Project description

Uncovering immune evasion mechanisms of leukemic cells from natural killer cells. In this sub-project RNA sequencing of leukemic cells is used to discover transcriptional changes resulting in NK cell-resistance. There is another sub-project using ATAC-seq (transposase-accessible chromatin) to epigenetic changes resulting in NK cell-resistance - this is not part of this report.

Experimental design

Taken from discussion summary (‘21009_Porject discussion summary.docx’):

  • 4 different cell lines (p185#13G, p185#13H, p185#15M, p185#15O). ADD description about cell line differences.
  • All conditions in technical triplicates and 2 biological replicates (biological replicates can be added any time, if there is a need).
  • Technical replicate means that we already start the co-culture in a separate well.

Biological and technical replicates

Timeline of the experiment

  • On time point Day 4 we FACS sort the original tumor cells (like Day 0) and the tumor cells which were co-cultured for 4 days with NK cells for the following analyses: ATAC seq, RNA seq and barcode analysis. Additionally we co-culture a certain amount of the same tumor cells again with NK cells for the analysis of Day 10.
  • On time point Day 10 we FACS sort the tumor cells which were co-cultured for 10 days with NK cells for the same analyses: ATAC seq, RNA seq and barcode analysis.

Experiment Timepoints

RNA-seq data analysis

RNA-seq methods: QuantSeq 3’ mRNA-Seq - 1x50bp HiSeq 3000/4000.

Pre-processing: ADD methods+versions for preprocessing Pre-processing pipeline

Downstream analysis: ADD methods+versions for downstream!

MultiQC report ADD full multiqc report description. Few observations:

# importing only key functions that are actually used - not to polute namespace!
import::from(readr, read_csv)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup)  # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = SummarizedExperiment, colData, assay) # used in every .Rmd
import::from(.from = tableone, CreateTableOne)

Preparing datasets

# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup)  # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)

import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = DESeq2, .all=TRUE)
import::from(.from = tximport, tximport)

import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filtering
experiment_name="nk_tum_immunoedit_complete" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1"  # used only to construct initial dds object
abs_filt_samples=3

# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
#biomart_host = "https://www.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"

# directories
output_dir = paste0("results_dir", "/",experiment_name,"/")
dir.create(output_dir)

Preparing metadata and expression data and combining into a dds (DESeqDataSet object) that is ready for analyses. dds object is further pre-filtered to remove lowly expressed genes (FPM (fragments per million mapped fragments) > 1 in at least 3 samples). Salmon quantification (quant.sf) transcript abundance estimates (transcript expression) is imported and converted to gene expression using tximport.

# Preparing metadata
rnaseq_metadata_file <- here::here(config$metadata$rnaseq$rnaseq_metadata)
rnaseq_metadata_df_raw <- readr::read_csv(file = rnaseq_metadata_file)  # 133 samples; 42 samples from other project?
quantseq_files <- here::here(config$metadata$rnaseq$rnaseq_salmon_files)

# salmon quant files
quant_files_md5sums_raw <- readr::read_table(file = quantseq_files, col_names = c("md5sum", "file_path"))
quant_files_md5sums <- quant_files_md5sums_raw %>%
  dplyr::mutate(absolute_quant_files_path = gsub(pattern = "\\./", replacement = paste0(dirname(quantseq_files), "/"), file_path),
                sample_name = gsub(pattern = "(\\./)(S_.+)(_0_.+)", replacement = "\\2", file_path)) %>%
  # for batch3
  dplyr::mutate(sample_name = gsub(pattern = "(\\./)(R.+)(_S.+)", replacement = "\\2", sample_name)) %>%
  # EK were samples from other experiment - they were removed from quant.sf list, but in case remove them here as well!
  dplyr::filter(!grepl(pattern = "\\./EK.+", x = sample_name)) %>%
  dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(S_.+)(_quant/quant.sf)", replacement = "\\2", x = file_path)) %>%
  # for batch 3
  dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(R.+_S.+)(_quant/quant.sf)", replacement = "\\2", x = bsf_sample_name))

# experiment info - match experiment to run
experiment_info_batch1_2_raw <- readr::read_csv("datasets/metadata/rnaseq_merge_sample_sheet.csv", col_types =cols(.default = "c"))
experiment_info_batch3_raw <- readr::read_csv("datasets/metadata/rnaseq_sample_sheet_batch3.csv", col_types = cols(.default = "c"))

experiment_info_raw <- dplyr::bind_rows(experiment_info_batch1_2_raw, experiment_info_batch3_raw)

# EK samples are from different experiment
experiment_info <- experiment_info_raw %>%
  dplyr::filter(!grepl(pattern = "EK.+", x = original_sample_name)) %>%
  dplyr::rename(library_name = experiment,
                bsf_sample_name = sample_name) %>%
  # fixing library_name for experiment EXP9.8
  dplyr::mutate(library_name = dplyr::if_else(library_name == "BSF_1241", "R0128_L5464", library_name)) %>%
  dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=original_sample_name) ~ paste0(original_sample_name, "_", library_name),
                                                      TRUE ~ paste0("S_", original_sample_name, "_", library_name)))
# loading metadata and adding experiment and salmon data
rnaseq_metadata_df <- rnaseq_metadata_df_raw %>%
  dplyr::mutate(condition_complete = condition) %>%
  dplyr::mutate(condition = gsub(pattern = "(.+)(_Day.+)", replacement = "\\1", condition_complete)) %>%
  dplyr::mutate(condition = gsub(pattern = "\\+", replacement = "_plus_", condition),
                timepoint_cell_harvesting = gsub(pattern = " ", replacement = "_", timepoint_cell_harvesting),
                experiment = gsub(pattern = " ", replacement = "", original_experiment)) %>%
  dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=sample_name) ~ paste0(sample_name, "_", library_name),
                   TRUE ~ paste0("S_", sample_name, "_", library_name)))

quant_files_md5sums_annot <- quant_files_md5sums %>%
  dplyr::left_join(., experiment_info, by = "bsf_sample_name") %>%
  dplyr::select(-sample_name, -library_name)

rnaseq_metadata_df_annot <- rnaseq_metadata_df %>%
  dplyr::left_join(., quant_files_md5sums_annot, by="sample_name_unique") %>%
  dplyr::mutate(original_sample_name = sample_name) %>%
  dplyr::mutate(sample_name = sample_name_unique) %>%
  dplyr::mutate(filenames = sample_name_unique)

#table(rnaseq_metadata_df_annot$original_experiment)
#table(rnaseq_metadata_df_annot$cell_line_label)
rnaseq_metadata_df_annot_reduced <- rnaseq_metadata_df_annot %>%
  #dplyr::filter(original_experiment == "EXP9_6") %>%
  #dplyr::filter(condition %in% c("Tumor_only", "Tumor_plus_NK_Day14")) %>%
  dplyr::mutate(technical_replicate = factor(technical_replicate, levels = c("1", "2", "3")),
                condition = factor(condition, levels = c("Tumor_only", "Tumor_plus_NK", "Tumor_plus_IFNg", "Tumor_plus_WT_NK", "Tumor_plus_PrfKO_NK")),
                timepoint_cell_harvesting = factor(timepoint_cell_harvesting, levels = c("timepoint_0", "timepoint_1", "timepoint_2")),
                cell_line_label = as.factor(cell_line_label),
                experiment = factor(experiment, levels = c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7", "EXP9.8")),
                condition_tp = paste0(condition,"_",timepoint_cell_harvesting)) %>%
  dplyr::mutate(condition_tp = factor(condition_tp, levels = c("Tumor_only_timepoint_0", 
                                                               "Tumor_only_timepoint_1", "Tumor_only_timepoint_2",
                                                               "Tumor_plus_NK_timepoint_1", "Tumor_plus_NK_timepoint_2",
                                                               "Tumor_plus_WT_NK_timepoint_2",
                                                               "Tumor_plus_IFNg_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"))) %>%
  #fixing day_cell_harvesting 
  dplyr::mutate(day_cell_harvesting = gsub(pattern = " ", replacement = "", day_cell_harvesting)) %>%
  dplyr::mutate(day_cell_harvesting = factor(day_cell_harvesting, levels=c("Day0", "Day4", "Day6", "Day10", "Day14", "Day16")))

metadata <- rnaseq_metadata_df_annot_reduced #cond_data
#rm(reseq_metadata)
rownames(metadata) <- metadata$filenames

# use alternative sample names for multiQC
# renaming for the multiQC report
# metadata_multiqc <- metadata %>%
#   dplyr::select(bsf_sample_name, experiment, technical_replicate, cell_line_label, condition, timepoint_cell_harvesting) %>%
#   dplyr::mutate(cell_line_label = paste0("cl", cell_line_label), 
#                 condition = gsub(pattern = "Tumor", replacement = "T", x = condition),
#                 timepoint_cell_harvesting = gsub(pattern = "timepoint", replacement = "tp", x=timepoint_cell_harvesting),
#                 technical_replicate = paste0("repl",technical_replicate)) %>%
#   dplyr::mutate(alt_name = paste(experiment, cell_line_label, technical_replicate, condition, timepoint_cell_harvesting, sep="-")) %>%
#   dplyr::select(bsf_sample_name, alt_name)
# 
# readr::write_tsv(metadata_multiqc, file = paste0(OUTPUT_DIR, experiment_name, "_metadata_multiqc.tsv"))
#TODO:
# [ ] - add md5sums for tx2gene_file
gtf_file <- file.path(annotation_dir, "Mus_musculus.GRCm38.102.gtf") # needed only once to generate tx2gene.tsv
tx2gene_file <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.tsv")
tx2gene_file_md5sum <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.md5sum")

if (!file.exists(tx2gene_file)) {
  message(tx2gene_file, " does not exist: generating!")
  txDB <- GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
  
  # AnnotationDbi::keys
  tx_name <- annot_db_keys(txDB, keytype="TXNAME")
  
  # AnnotationDbi::select
  tx2gene_gtf <- annot_db_select(txDB,
                                 keys = tx_name,
                                 columns="GENEID",
                                 keytype = "TXNAME")
  
  
  # saving tx2gene into destdir
  write.table(tx2gene_gtf,
              file = tx2gene_file,
              sep = "\t",
              col.names = FALSE,
              row.names = FALSE,
              quote = FALSE)
  
  tx2gene_md5sum <- digest::digest(tx2gene_file, file=TRUE, algo="md5")
  write.table(paste(tx2gene_md5sum, basename(tx2gene_file), collapse = "\t"),
              file = tx2gene_file_md5sum,
              sep = "\t",
              col.names = FALSE,
              row.names = FALSE,
              quote = FALSE)
  
} else {
  message(tx2gene_file, " exists: loading!")
  tx2gene <- readr::read_tsv(tx2gene_file,
                             col_names = c("tx_id", "gene_id"),
                             show_col_types = FALSE)
}
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)

precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)

if(!file.exists(precomputed_objects_file)){
  cat("RNA objects file '", precomputed_objects_filename, "' does not exist in the OUTPUT_DIR...\n")
  cat("generating", precomputed_objects_filename, "\n")
  # Generating dds and filtered dds objects ----
  #require(DESeq2)
  # here there is a single sample so we use ~1.
  # expect a warning that there is only a single sample...
  
  # generating dds object ----
  # txi_object <- generateTxi(metadata_object=metadata,
  #                           column_names=c("absolute_quant_files_path", "filenames"),
  #                           tx2gene = tx2gene)
  
  quant_files <- metadata$absolute_quant_files_path
  names(quant_files) <- metadata$sample_name
  
  txi_object <- tximport::tximport(quant_files,
                         type="salmon",
                         tx2gene = tx2gene,
                         ignoreTxVersion = TRUE)
  
  sum(rownames(metadata) == colnames(txi_object$counts)) # check if names match between metadata and counts data
  cat("... txi_object\n")
  
  dds <- DESeq2::DESeqDataSetFromTximport(txi_object, 
                                          colData = metadata, 
                                          design = as.formula(paste0("~", experiment_design)))
  
  cat("... dds\n")
  dds <- DESeq2::estimateSizeFactors(dds)
  dds <- DESeq2::DESeq(dds, minReplicatesForReplace=Inf) # do not replace outliers based on replicates
  
  # filtering dds
  cat("... dds filtering\n")
  dds_filt <- filterDatasets(dds, abs_filt = TRUE, 
                             abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
  dds_filt <- DESeq2::estimateSizeFactors(dds_filt)

  # Generate annotation file using biomaRt
  #FIXME:
  # Issues with Curl when using https:// and http:// throws a warning:
  #  Warning: Ensembl will soon enforce the use of https.
  #   Ensure the 'host' argument includes "https://"
  cat("... fetching annotation from biomart\n")
  ensemblAnnot <- generateEnsemblAnnotation(ensembl_ids = rownames(dds),
                                            host=biomart_host,
                                            version=biomart_Ens_version,
                                            dataset=biomart_dataset)
  
  cat("saving...", precomputed_objects_file, "\n")
  cat("...into:", output_dir, "\n")
  save(dds, dds_filt, ensemblAnnot,  file = precomputed_objects_file)
  
} else{
  cat("RNA objects file '", precomputed_objects_filename, "' exist...loading\n")
  load(precomputed_objects_file)
}
## RNA objects file ' nk_tum_immunoedit_complete_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_file)){
  # transformation
  log2_norm_filt <- DESeq2::normTransform(dds_filt)
  vsd_filt <- DESeq2::vst(dds_filt, blind = TRUE) # blind = TRUE for QC
  #rld_filt <- rlog(dds_filt, blind = TRUE)
  #rld_filt - too big for >100 samples; skipping!
  #rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
  save(log2_norm_filt, vsd_filt,   
       file = precomputed_transf_objects_file)
} else{
  load(precomputed_transf_objects_file)
}

Parameters for dataset preparation:

Parameter Value Decription
experiment_name nk_tum_immunoedit_complete this can be a subset analysis - e.g. just batch1,…
experiment_design 1 used only to construct initial dds object
abs_filt_samples 3 # at least in N samples, which is a smallest group size
biomart_host http://nov2020.archive.ensembl.org
biomart_Ens_version Ensembl Genes 102
biomart_dataset mmusculus_gene_ensembl

Objects overview:

dds_objects_overview <- data.frame(dds_object = c("dds", "dds_filt"),
                                   n_samples = c(ncol(dds), ncol(dds_filt)),
                                   n_genes = c(nrow(dds), nrow(dds_filt)))

dds_objects_overview %>%
  kableExtra::kbl(caption = "dds objects overview") %>% 
  kableExtra::kable_classic(full_width = T)
dds objects overview
dds_object n_samples n_genes
dds 169 51149
dds_filt 169 15430
# DT::datatable(dds_objects_overview,
#           options = list(pageLength = 10),
#           caption = 'Table 1: Samples in validation set')

Exploratory Data Analysis - metadata

#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = DT, datatable)
import::from(.from = ggplot2, .all=TRUE)
import::from(.from = DataExplorer, plot_bar)
#import::from(.from = tableone, CreateTableOne)
import::from(.from = dplyr, group_by, count)
experiment_name="nk_tum_immunoedit_complete"
output_dir <- paste0(results_dir, "/",experiment_name,"/") # "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/" #  
# results_dir defined in the nk_immunoediting.Rmd file

eda_plots_dir <- paste0(output_dir, "eda_plots/")
dir.create(eda_plots_dir)

# raw and filtered dds
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData") #"nk_tum_immunoedit_complete_dds_objects.RData" #
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)

# log2 and vsd transformed
precomputed_transf_objects_filename <- "nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)

# check if object loaded if not load
if(!exists(precomputed_objects_file)){
  load(precomputed_objects_file)
  load(precomputed_transf_objects_file)
} 

metadata <- tibble::as_tibble(SummarizedExperiment::colData(dds_filt))

key_metadata <- metadata %>%
  dplyr::select(experiment, condition_tp,condition, timepoint_cell_harvesting, day_cell_harvesting, cell_line_label, technical_replicate) 

# saving metadata
openxlsx::write.xlsx(metadata, file = file.path(output_dir, paste0(experiment_name, "_metadata.xlsx")))

Metadata overview:

# 'KeyTable', ,'FixedHeader', 'Responsive', 'Scroller', 'ColReorder'
# to add show entries - https://stackoverflow.com/questions/64976684/how-to-have-buttons-and-a-show-entries-using-datatables-dt-in-rmarkdown

DT::datatable(data = metadata,
              caption = 'Metadata overview',
              filter = 'top',
              class = 'hover',
    extensions = c('Buttons'), options = list(
      pageLength = 5,
      #autoWidth = TRUE
      #fixedHeader = TRUE, 
      scrollX = TRUE,
      #scrollY = TRUE,
      #scrollY = 200,
      #deferRender = TRUE,
      #scroller = TRUE,
      #colReorder = TRUE,
      #keys = TRUE,
      searchHighlight = TRUE,
      dom = 'Blfrtip',
      buttons = list('copy', 'colvis', list(
        extend = 'collection',
        buttons = c('csv', 'excel', 'pdf'),
        text = 'Download'
      ))
  )
)
# 1. metadata overview
# 2. composition per experiment
# 3. relationship (table(x, y)) - to see how many variables we have in each category; data imbalance!?

# frequency of key variables ----
# fig.show='hide' - to supress plotting here
key_variables_freq <- DataExplorer::plot_bar(key_metadata, order_bar=TRUE, ggtheme = theme_bw(), title = "Frequency of key variables")
# table of variable frequency per experiment ----
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata, -experiment)), 
                           strata = c("experiment"), 
                           data = key_metadata)

# relate variables ----
# cat_cat_plot <- ggplot(data = key_metadata, color=cell_line_label) +
#   geom_count(mapping = aes(x = experiment, y = condition_tp)) #+   facet_wrap(~cell_line_label)

cat_cat_counts <- key_metadata %>% 
  dplyr::group_by(cell_line_label) %>%
  dplyr::count(experiment, condition_tp)

cat_cat_plot <- ggplot(data = cat_cat_counts, mapping = aes(x = experiment, y = condition_tp, group=cell_line_label)) +
  #     geom_tile(mapping = aes(fill = n))
  geom_point(mapping=aes(size=n, color=cell_line_label), position = position_dodge(width=0.5)) +
  scale_size(breaks = c(0,1,2,3,4,5,6)) +
  # ggrepel::geom_label_repel(aes(label = n),
  #                           box.padding   = 0.35, 
  #                           point.padding = 0.5,
  #                           segment.color = 'grey50') +
  ggtitle("Number of samples pre condition/experiment.") +
  theme_bw()

Frequency of key variable:

print(key_variables_freq$page_1)

ggsave(filename = paste0(eda_plots_dir, "key_variables_freq_plot.png"), 
       plot=key_variables_freq$page_1,
       width = 20, height = 20, units = "cm")

Overview of number of samples in different categories (experiment, condition,…).:

#print(key_variables_tableOne$CatTable)
tableone::kableone(key_variables_tableOne$CatTable,
                   caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
  kableExtra::kable_material(c("striped", "hover"))
Overview of number of samples in different categories (experiment, condition,…).
EXP9.4 EXP9.5 EXP9.6 EXP9.7 EXP9.8 p test
n 18 36 39 40 36
condition_tp (%) <0.001
Tumor_only_timepoint_0 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Tumor_only_timepoint_1 6 (33.3) 12 (33.3) 12 (30.8) 12 (30.0) 6 (16.7)
Tumor_only_timepoint_2 0 ( 0.0) 0 ( 0.0) 4 (10.3) 4 (10.0) 6 (16.7)
Tumor_plus_NK_timepoint_1 6 (33.3) 12 (33.3) 12 (30.8) 12 (30.0) 0 ( 0.0)
Tumor_plus_NK_timepoint_2 6 (33.3) 12 (33.3) 11 (28.2) 12 (30.0) 0 ( 0.0)
Tumor_plus_WT_NK_timepoint_2 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Tumor_plus_IFNg_timepoint_2 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Tumor_plus_PrfKO_NK_timepoint_2 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
condition (%) <0.001
Tumor_only 6 (33.3) 12 (33.3) 16 (41.0) 16 (40.0) 18 (50.0)
Tumor_plus_NK 12 (66.7) 24 (66.7) 23 (59.0) 24 (60.0) 0 ( 0.0)
Tumor_plus_IFNg 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Tumor_plus_WT_NK 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Tumor_plus_PrfKO_NK 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
timepoint_cell_harvesting (%) <0.001
timepoint_0 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
timepoint_1 12 (66.7) 24 (66.7) 24 (61.5) 24 (60.0) 6 (16.7)
timepoint_2 6 (33.3) 12 (33.3) 15 (38.5) 16 (40.0) 24 (66.7)
day_cell_harvesting (%) <0.001
Day0 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 6 (16.7)
Day4 12 (66.7) 24 (66.7) 24 (61.5) 0 ( 0.0) 6 (16.7)
Day6 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 24 (60.0) 0 ( 0.0)
Day10 6 (33.3) 12 (33.3) 0 ( 0.0) 0 ( 0.0) 0 ( 0.0)
Day14 0 ( 0.0) 0 ( 0.0) 15 (38.5) 0 ( 0.0) 24 (66.7)
Day16 0 ( 0.0) 0 ( 0.0) 0 ( 0.0) 16 (40.0) 0 ( 0.0)
cell_line_label (%) <0.001
A 0 ( 0.0) 9 (25.0) 10 (25.6) 10 (25.0) 18 (50.0)
B 0 ( 0.0) 9 (25.0) 10 (25.6) 10 (25.0) 0 ( 0.0)
C 9 (50.0) 9 (25.0) 10 (25.6) 10 (25.0) 0 ( 0.0)
D 9 (50.0) 9 (25.0) 9 (23.1) 10 (25.0) 18 (50.0)
technical_replicate (%) 0.998
1 6 (33.3) 12 (33.3) 16 (41.0) 16 (40.0) 12 (33.3)
2 6 (33.3) 12 (33.3) 12 (30.8) 12 (30.0) 12 (33.3)
3 6 (33.3) 12 (33.3) 11 (28.2) 12 (30.0) 12 (33.3)

Plot of number of samples in each condition across different experiments:

print(cat_cat_plot)

ggsave(filename = paste0(eda_plots_dir, "key_variables_relate_plot.png"), 
       plot=cat_cat_plot,
       width = 20, height = 20, units = "cm")

Exploratory Data Analysis - expression

# import::from(.from = SummarizedExperiment, colData)
# import::from(.from = DT, datatable)
# import::from(.from = ggplot2, .all=TRUE)
# import::from(.from = DataExplorer, plot_bar)
# import::from(.from = tableone, CreateTableOne)
# import::from(.from = dplyr, group_by, count)

#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)

# import utils scripts
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", .character_only=TRUE)
experiment_name="nk_tum_immunoedit_complete"
output_dir <- paste0(results_dir, "/",experiment_name,"/") # "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/" 
# results_dir defined in the nk_immunoediting.Rmd file

eda_plots_dir <- paste0(output_dir, "eda_plots/")
dir.create(eda_plots_dir)

# raw and filtered dds
precomputed_objects_filename <-paste0(experiment_name, "_dds_objects.RData") # "nk_tum_immunoedit_complete_dds_objects.RData" 
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)

# log2 and vsd transformed
precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData") #"nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)

# check if object loaded if not load
if(!exists(precomputed_objects_file)){
  load(precomputed_objects_file)
  load(precomputed_transf_objects_file)
} 

metadata <- tibble::as_tibble(SummarizedExperiment::colData(dds_filt))

key_metadata <- metadata %>%
  dplyr::select(experiment, condition_tp,condition, timepoint_cell_harvesting, day_cell_harvesting, cell_line_label, technical_replicate) 

# saving metadata
#openxlsx::write.xlsx(metadata, file = file.path(output_dir, paste0(experiment_name, "_metadata.xlsx")))

# preparing vsd per experiment ----
experiments <- unique(as.character(vsd_filt$experiment))
vsd_filt_perExp <- purrr::map(.x = experiments, .f = function(experiment_name){
  vsd_filt[,vsd_filt$experiment == experiment_name]
}) %>% setNames(experiments)
#lapply(vsd_filt_perExp, dim)
cond_interest <- "condition_tp"   # column of interest for PCA plots etc.
cond_interest_varPart <- c("condition_tp", "condition", "timepoint_cell_harvesting", "cell_line_label", "technical_replicate", "experiment")
cond_interest_varPart_corr <- c("condition_tp", "cell_line_label", "experiment", "technical_replicate")
  
# #~  (1|technical_replicate) + (1|experiment) + (1|cell_line_label) + (1|condition_tp) 
fitform_partVariance <- ~(1 | technical_replicate) + (1 | experiment) + (1 | cell_line_label) + (1 | timepoint_cell_harvesting) + (1 | condition) 
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6         # at least 60% variance explained needed
transf_object <- vsd_filt # which object to use for pca, variance partition etc. in case there is e.g. log2norm, rlog,...

Variance partition

# calculating overall variance partition ----
variance_partition_all <- varPartitionEstimate(transf_object = transf_object, 
                                              fitform_partVariance = fitform_partVariance,
                                              ntop_genes=1000,
                                              ncores = 12)
## Selecting 1000 Ntop genes based on variance.
# calculating variance partition per experiment ----
fitform_partVariance_perExperiment <- ~(1 | technical_replicate) + (1 | cell_line_label) + (1 | timepoint_cell_harvesting) + (1 | condition)
variance_partition_perExperiment <- purrr::map(.x=names(vsd_filt_perExp), .f=function(experiment_name){
  varPartitionEstimate(transf_object = vsd_filt_perExp[[experiment_name]], 
                                              fitform_partVariance = fitform_partVariance_perExperiment,
                                              ntop_genes=1000,
                                              ncores = 12)
}) %>% setNames(names(vsd_filt_perExp))
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
#print(key_variables_tableOne$CatTable)
variance_partition_all_plot <- variance_partition_all$varPart_plot_annot + ggtitle("Variance partition all experiments")

print(variance_partition_all_plot)

ggsave(filename = paste0(eda_plots_dir, "variance_partition_all_plot.png"), 
       plot=variance_partition_all_plot,
       width = 20, height = 20, units = "cm")

Variance explained by different variables of interest. There seems to be a lot of variation coming from different experiments and cell lines. This will need to be accounted in the design or downstream batch effect removal.

#print(key_variables_tableOne$CatTable)
variance_partition_all$varPart_stats %>%
  kableExtra::kbl(caption="Variance partition table - all experiments") %>%
  kableExtra::kable_material(c("striped", "hover"))
Variance partition table - all experiments
variable median_varExplained mean_varExplained IQR_varExplained max_varExplained min_varExplained
experiment 26.5 35.9 52.3 99.7 0.0
cell_line_label 2.8 13.4 10.4 96.6 0.0
condition 0.8 7.7 8.0 82.8 0.0
timepoint_cell_harvesting 0.0 1.2 0.9 33.3 0.0
technical_replicate 0.0 0.3 0.2 6.8 0.0
Residuals 34.1 41.5 46.0 100.0 0.3

*Please note color coding varies between variance_partition_perExperiment

variance_partition_perExperiment_plots <- purrr::map(.x=names(variance_partition_perExperiment), .f = function(experiment_name){
  variance_partition_perExperiment[[experiment_name]]$varPart_plot_annot + ggtitle(paste0("Variance partition experiment - ", experiment_name))
}) %>% setNames(names(variance_partition_perExperiment))

variance_partition_perExperiment_plots <- purrr::map(.x=names(variance_partition_perExperiment), .f = function(experiment_name){
  print(variance_partition_perExperiment_plots[[experiment_name]])
  ggsave(filename = paste0(eda_plots_dir, paste0("variance_partition_", experiment_name,"_plot.png")), 
       plot=variance_partition_perExperiment_plots[[experiment_name]],
       width = 20, height = 20, units = "cm")
}) %>% setNames(names(variance_partition_perExperiment))

Principal Component Analysis (PCA)

# calculating how many components are needed ----
transf_object_counts_ntop <- rnaSelectTopVarGenes(SummarizedExperiment::assay(transf_object), ntop = 1000, type = "var")
## Selecting 1000 Ntop genes based on variance.
pca_object <- stats::prcomp(t(transf_object_counts_ntop), center = TRUE)
pca_results <- pcaExtractVariance(pca_object)
pca_scree_cumVarExpl <- pcaPlotVariance(pca_results, var_expl_needed = var_expl_needed)
min_components_needed <- which(pca_results$cummulative_variance > var_expl_needed)[1] # min components to get var_expl_needed 

# calculating components correlation with variables ----
#TODO:
# fix - correlation between PCs and variables of interest! - fix sample naming!
# PC_corr <- pcaCorrPCs(pca_object, metadata[cond_interest_varPart_corr], pcs = 1:min_components_needed, pval_exact = TRUE) 
# PC_corr_plot <- pcaCorrPCsPlot(PC_corr)

# calculating overall PCA ----
condition_tp_colors <- RColorBrewer::brewer.pal(length(unique(metadata$condition_tp)),"Dark2")
names(condition_tp_colors) <- levels(metadata$condition_tp)
    
pca_all <- generatePCA(transf_object = transf_object, 
                       cond_interest_varPart = cond_interest_varPart, 
                       color_variable = "condition_tp", 
                       shape_variable = "experiment",
                       ntop_genes = 1000) +
      ggtitle("Overall PCA") +
      ggplot2::scale_color_manual(values = condition_tp_colors)   
 
pca_perExperiment_plots <- purrr::map(.x=names(vsd_filt_perExp), .f = function(experiment_name){
    pca_plot <- generatePCA(transf_object = vsd_filt_perExp[[experiment_name]], 
                cond_interest_varPart = cond_interest_varPart, 
                color_variable = "condition_tp", 
                shape_variable = "cell_line_label",
                ntop_genes = 1000)
  
    pca_plot + ggtitle(experiment_name) +
      # make universal color, shape across the whole report!
      ggplot2::scale_shape_manual(values = c("A" = 16, "B" = 15, "C"=17, "D"=3)) +
      ggplot2::scale_color_manual(values = condition_tp_colors)   
}) %>% setNames(names(vsd_filt_perExp))

combined_pca_plot <- ggpubr::ggarrange(pca_perExperiment_plots$EXP9.4, 
                                       pca_perExperiment_plots$EXP9.5,
                                       pca_perExperiment_plots$EXP9.6,
                                       pca_perExperiment_plots$EXP9.7,
                                       pca_perExperiment_plots$EXP9.8, 
                                       nrow=3, ncol=2,
                                       common.legend = FALSE) #patchwork::wrap_plots(pca_perExperiment_plots, ncol=2)
print(pca_scree_cumVarExpl)

ggsave(filename = paste0(eda_plots_dir, "pca_scree_cumVarExpl_plot.png"), 
       plot=pca_scree_cumVarExpl,
       width = 20, height = 20, units = "cm")

Overall PCA plot:

print(pca_all)

ggsave(filename = paste0(eda_plots_dir, "pca_all_plot.png"), 
       plot=pca_all,
       width = 20, height = 20, units = "cm")

PCA plot per experiment:

print(combined_pca_plot)

ggsave(filename = paste0(eda_plots_dir, "pca_perExperiment_plot.png"), 
       plot=combined_pca_plot,
       width = 20, height = 20, units = "cm")

Results - TpNK_tp2_vs_Tonly_tp1

Copied completely from the “Prepare dataset.Rmd”

Trying to keep everythin in one place

The analysis is similar to the previous version, however here I don’t do the correction for the batch correction between AB and CD

# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup)  # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)

import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = DESeq2, .all=TRUE)
import::from(.from = tximport, tximport)

import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filtering
#project setup
# loading config file ----
config_file <- file.path( "nk_tum_immunoediting_config.yaml")  # params$config_file; params not found? ; change to project_config.yaml
config <- yaml::read_yaml(config_file)
#config <- yaml::read_yaml(params$config_file)

experiment_name="nk_tum_immunoedit_complete" # this can be a subset analysis - e.g. just batch1,...
#experiment_name="nk_tum_immunoedit_complete_ab2_AB_CD_sep" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1"  # used only to construct initial dds object
abs_filt_samples=3

# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
#biomart_host = "https://www.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"

# directories
output_dir = paste0("./results_dir", "/",experiment_name,"/")
dir.create(output_dir)

Preparing metadata and expression data and combining into a dds (DESeqDataSet object) that is ready for analyses. dds object is further pre-filtered to remove lowly expressed genes (FPM (fragments per million mapped fragments) > 1 in at least 3 samples). Salmon quantification (quant.sf) transcript abundance estimates (transcript expression) is imported and converted to gene expression using tximport.

# Preparing metadata
rnaseq_metadata_file <- here::here(config$metadata$rnaseq$rnaseq_metadata)
rnaseq_metadata_df_raw <- readr::read_csv(file = rnaseq_metadata_file)  # 133 samples; 42 samples from other project?
quantseq_files <- here::here(config$metadata$rnaseq$rnaseq_salmon_files)

# salmon quant files
quant_files_md5sums_raw <- readr::read_table(file = quantseq_files, col_names = c("md5sum", "file_path"))
quant_files_md5sums <- quant_files_md5sums_raw %>%
  dplyr::mutate(absolute_quant_files_path = gsub(pattern = "\\./", replacement = paste0(dirname(quantseq_files), "/"), file_path),
                sample_name = gsub(pattern = "(\\./)(S_.+)(_0_.+)", replacement = "\\2", file_path)) %>%
  # for batch3
  dplyr::mutate(sample_name = gsub(pattern = "(\\./)(R.+)(_S.+)", replacement = "\\2", sample_name)) %>%
  # EK were samples from other experiment - they were removed from quant.sf list, but in case remove them here as well!
  dplyr::filter(!grepl(pattern = "\\./EK.+", x = sample_name)) %>%
  dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(S_.+)(_quant/quant.sf)", replacement = "\\2", x = file_path)) %>%
  # for batch 3
  dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(R.+_S.+)(_quant/quant.sf)", replacement = "\\2", x = bsf_sample_name))

# experiment info - match experiment to run
experiment_info_batch1_2_raw <- readr::read_csv("datasets/metadata/rnaseq_merge_sample_sheet.csv", col_types =cols(.default = "c"))
experiment_info_batch3_raw <- readr::read_csv("datasets/metadata/rnaseq_sample_sheet_batch3.csv", col_types = cols(.default = "c"))

experiment_info_raw <- dplyr::bind_rows(experiment_info_batch1_2_raw, experiment_info_batch3_raw)

# EK samples are from different experiment
experiment_info <- experiment_info_raw %>%
  dplyr::filter(!grepl(pattern = "EK.+", x = original_sample_name)) %>%
  dplyr::rename(library_name = experiment,
                bsf_sample_name = sample_name) %>%
  # fixing library_name for experiment EXP9.8
  dplyr::mutate(library_name = dplyr::if_else(library_name == "BSF_1241", "R0128_L5464", library_name)) %>%
  dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=original_sample_name) ~ paste0(original_sample_name, "_", library_name),
                                                      TRUE ~ paste0("S_", original_sample_name, "_", library_name)))
# loading metadata and adding experiment and salmon data
rnaseq_metadata_df <- rnaseq_metadata_df_raw %>%
  dplyr::mutate(condition_complete = condition) %>%
  dplyr::mutate(condition = gsub(pattern = "(.+)(_Day.+)", replacement = "\\1", condition_complete)) %>%
  dplyr::mutate(condition = gsub(pattern = "\\+", replacement = "_plus_", condition),
                timepoint_cell_harvesting = gsub(pattern = " ", replacement = "_", timepoint_cell_harvesting),
                experiment = gsub(pattern = " ", replacement = "", original_experiment)) %>%
  dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=sample_name) ~ paste0(sample_name, "_", library_name),
                   TRUE ~ paste0("S_", sample_name, "_", library_name)))

quant_files_md5sums_annot <- quant_files_md5sums %>%
  dplyr::left_join(., experiment_info, by = "bsf_sample_name") %>%
  dplyr::select(-sample_name, -library_name)

rnaseq_metadata_df_annot <- rnaseq_metadata_df %>%
  dplyr::left_join(., quant_files_md5sums_annot, by="sample_name_unique") %>%
  dplyr::mutate(original_sample_name = sample_name) %>%
  dplyr::mutate(sample_name = sample_name_unique) %>%
  dplyr::mutate(filenames = sample_name_unique)

#table(rnaseq_metadata_df_annot$original_experiment)
#table(rnaseq_metadata_df_annot$cell_line_label)
rnaseq_metadata_df_annot_reduced <- rnaseq_metadata_df_annot %>%
  #dplyr::filter(original_experiment == "EXP9_6") %>%
  #dplyr::filter(condition %in% c("Tumor_only", "Tumor_plus_NK_Day14")) %>%
  dplyr::mutate(technical_replicate = factor(technical_replicate, levels = c("1", "2", "3")),
                condition = factor(condition, levels = c("Tumor_only", "Tumor_plus_NK", "Tumor_plus_IFNg", "Tumor_plus_WT_NK", "Tumor_plus_PrfKO_NK")),
                timepoint_cell_harvesting = factor(timepoint_cell_harvesting, levels = c("timepoint_0", "timepoint_1", "timepoint_2")),
                cell_line_label = as.factor(cell_line_label),
                experiment = factor(experiment, levels = c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7", "EXP9.8")),
                condition_tp = paste0(condition,"_",timepoint_cell_harvesting)) %>%
  dplyr::mutate(condition_tp = factor(condition_tp, levels = c("Tumor_only_timepoint_0", 
                                                               "Tumor_only_timepoint_1", "Tumor_only_timepoint_2",
                                                               "Tumor_plus_NK_timepoint_1", "Tumor_plus_NK_timepoint_2",
                                                               "Tumor_plus_WT_NK_timepoint_2",
                                                               "Tumor_plus_IFNg_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"))) %>%
  #fixing day_cell_harvesting 
  dplyr::mutate(day_cell_harvesting = gsub(pattern = " ", replacement = "", day_cell_harvesting)) %>%
  dplyr::mutate(day_cell_harvesting = factor(day_cell_harvesting, levels=c("Day0", "Day4", "Day6", "Day10", "Day14", "Day16")))

metadata <- rnaseq_metadata_df_annot_reduced #cond_data
#rm(reseq_metadata)
rownames(metadata) <- metadata$filenames

# use alternative sample names for multiQC
# renaming for the multiQC report
# metadata_multiqc <- metadata %>%
#   dplyr::select(bsf_sample_name, experiment, technical_replicate, cell_line_label, condition, timepoint_cell_harvesting) %>%
#   dplyr::mutate(cell_line_label = paste0("cl", cell_line_label), 
#                 condition = gsub(pattern = "Tumor", replacement = "T", x = condition),
#                 timepoint_cell_harvesting = gsub(pattern = "timepoint", replacement = "tp", x=timepoint_cell_harvesting),
#                 technical_replicate = paste0("repl",technical_replicate)) %>%
#   dplyr::mutate(alt_name = paste(experiment, cell_line_label, technical_replicate, condition, timepoint_cell_harvesting, sep="-")) %>%
#   dplyr::select(bsf_sample_name, alt_name)
# 
# readr::write_tsv(metadata_multiqc, file = paste0(OUTPUT_DIR, experiment_name, "_metadata_multiqc.tsv"))
#TODO:
# [ ] - add md5sums for tx2gene_file

data_dir <- file.path("datasets") 
annotation_dir = paste0(data_dir, "/annotations/")

gtf_file <- file.path(annotation_dir, "Mus_musculus.GRCm38.102.gtf") # needed only once to generate tx2gene.tsv
tx2gene_file <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.tsv")
tx2gene_file_md5sum <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.md5sum")

if (!file.exists(tx2gene_file)) {
  message(tx2gene_file, " does not exist: generating!")
  txDB <- GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
  
  # AnnotationDbi::keys
  tx_name <- annot_db_keys(txDB, keytype="TXNAME")
  
  # AnnotationDbi::select
  tx2gene_gtf <- annot_db_select(txDB,
                                 keys = tx_name,
                                 columns="GENEID",
                                 keytype = "TXNAME")
  
  
  # saving tx2gene into destdir. Potentially there is a typo here. The name should be tx2gene, not _gtf
  write.table(tx2gene_gtf,
              file = tx2gene_file,
              sep = "\t",
              col.names = FALSE,
              row.names = FALSE,
              quote = FALSE)
  
  tx2gene_md5sum <- digest::digest(tx2gene_file, file=TRUE, algo="md5")
  write.table(paste(tx2gene_md5sum, basename(tx2gene_file), collapse = "\t"),
              file = tx2gene_file_md5sum,
              sep = "\t",
              col.names = FALSE,
              row.names = FALSE,
              quote = FALSE)
  
} else {
  message(tx2gene_file, " exists: loading!")
  tx2gene <- readr::read_tsv(tx2gene_file,
                             col_names = c("tx_id", "gene_id"),
                             show_col_types = FALSE)
}
precomputed_objects_filename <- paste0("nk_tum_immunoedit_complete", "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)

precomputed_transf_objects_filename <- paste0("nk_tum_immunoedit_complete", "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)

if(!file.exists(precomputed_objects_file)){
  cat("RNA objects file '", precomputed_objects_filename, "' does not exist in the OUTPUT_DIR...\n")
  cat("generating", precomputed_objects_filename, "\n")
  # Generating dds and filtered dds objects ----
  #require(DESeq2)
  # here there is a single sample so we use ~1.
  # expect a warning that there is only a single sample...
  
  # generating dds object ----
  # txi_object <- generateTxi(metadata_object=metadata,
  #                           column_names=c("absolute_quant_files_path", "filenames"),
  #                           tx2gene = tx2gene)
  
  quant_files <- metadata$absolute_quant_files_path
  names(quant_files) <- metadata$sample_name
  
  txi_object <- tximport::tximport(quant_files,
                         type="salmon",
                         tx2gene = tx2gene,
                         ignoreTxVersion = TRUE)
  
  sum(rownames(metadata) == colnames(txi_object$counts)) # check if names match between metadata and counts data
  cat("... txi_object\n")
  
  dds <- DESeq2::DESeqDataSetFromTximport(txi_object, 
                                          colData = metadata, 
                                          design = as.formula(paste0("~", experiment_design)))
  
  cat("... dds\n")
  dds <- DESeq2::estimateSizeFactors(dds) # Isn't the data already normalized by Salmon?
  dds <- DESeq2::DESeq(dds, minReplicatesForReplace=Inf) # do not replace outliers based on replicates
  
  # filtering dds
  cat("... dds filtering\n")
  dds_filt <- filterDatasets(dds, abs_filt = TRUE, 
                             abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
  dds_filt <- DESeq2::estimateSizeFactors(dds_filt)

  # Generate annotation file using biomaRt
  #FIXME:
  # Issues with Curl when using https:// and http:// throws a warning:
  #  Warning: Ensembl will soon enforce the use of https.
  #   Ensure the 'host' argument includes "https://"
  cat("... fetching annotation from biomart\n")
  ensemblAnnot <- generateEnsemblAnnotation(ensembl_ids = rownames(dds),
                                            host=biomart_host,
                                            version=biomart_Ens_version,
                                            dataset=biomart_dataset)
  
  cat("saving...", precomputed_objects_file, "\n")
  cat("...into:", output_dir, "\n")
  save(dds, dds_filt, ensemblAnnot,  file = precomputed_objects_file)
  
} else{
  cat("RNA objects file '", precomputed_objects_filename, "' exist...loading\n")
  load(precomputed_objects_file)
}
## RNA objects file ' nk_tum_immunoedit_complete_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_file)){
  # transformation
  log2_norm_filt <- DESeq2::normTransform(dds_filt)
  vsd_filt <- DESeq2::vst(dds_filt, blind = TRUE) # blind = TRUE for QC
  #rld_filt <- rlog(dds_filt, blind = TRUE)
  #rld_filt - too big for >100 samples; skipping!
  #rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
  save(log2_norm_filt, vsd_filt,   
       file = precomputed_transf_objects_file)
} else{
  load(precomputed_transf_objects_file)
}

Parameters for dataset preparation:

Parameter Value Decription
experiment_name nk_tum_immunoedit_complete this can be a subset analysis - e.g. just batch1,…
experiment_design 1 used only to construct initial dds object
abs_filt_samples 3 # at least in N samples, which is a smallest group size
biomart_host http://nov2020.archive.ensembl.org
biomart_Ens_version Ensembl Genes 102
biomart_dataset mmusculus_gene_ensembl

Objects overview:

dds_objects_overview <- data.frame(dds_object = c("dds", "dds_filt"),
                                   n_samples = c(ncol(dds), ncol(dds_filt)),
                                   n_genes = c(nrow(dds), nrow(dds_filt)))

dds_objects_overview %>%
  kableExtra::kbl(caption = "dds objects overview") %>% 
  kableExtra::kable_classic(full_width = T)
dds objects overview
dds_object n_samples n_genes
dds 169 51149
dds_filt 169 15430
# DT::datatable(dds_objects_overview,
#           options = list(pageLength = 10),
#           caption = 'Table 1: Samples in validation set')

End of the data_prep steps

#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)

# import utils scripts
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", "generatePCA_plus_shape", .character_only=TRUE)
import::from(.from = here::here("utils/generateResults.R"), "generateResults_upd", .character_only=TRUE) # 
import::from(.from = here::here("utils/plotDegResults.R"), "plotVolcano","plotVolcano_repel", .character_only=TRUE)

library(ggrepel)

Differential expression analysis

Comparison between Tumor_plus_NK vs Tumor_only - specifically Tumor_plus_NK_timepoint_2 against Tumor_only_timepoint_1. Tumor_plus_WT_NK_timepoint_2 was renamed to Tumor_plus_NK_timepoint_2

Steps:

  • subsetting dds_filt to contain only condition_tp of interest (Tumor_only_timepoint_1, Tumor_plus_NK_timepoint_2 = Tumor_plus_WT_NK_timepoint_2)
  • filter (again) for lowly expressed genes
  • results with differentially expressed genes (DEG) and significantly DEG at a selected cutoff
  • enrichment analysis (over-representation analysis) of the significantly DEG genes; GOBP, GOCC and GOMF terms enrichment
abs_filt_samples=3
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6         # at least 60% variance explained needed

# compare Tumor_only between timepoints!
# condition_tp_subset <- c("Tumor_only_timepoint_0", 
#                          "Tumor_only_timepoint_1", 
#                          "Tumor_only_timepoint_2", 
#                          "Tumor_plus_NK_timepoint_1", 
#                          "Tumor_plus_NK_timepoint_2",
#                          "Tumor_plus_WT_NK_timepoint_2")

condition_tp_subset <- c("Tumor_only_timepoint_1",
                         "Tumor_plus_NK_timepoint_2",
                         "Tumor_plus_NK_timepoint_1",
                         "Tumor_plus_WT_NK_timepoint_2")

genes_of_interest <- list("Denn2b"="ENSMUSG00000031024",
                          "Gbp6"="ENSMUSG00000104713",
                          "H2-K1"="ENSMUSG00000061232",
                          "Hs3st2"="ENSMUSG00000046321",
                          "Htra3"="ENSMUSG00000029096",
                          "Ifi27"="ENSMUSG00000064215",
                          "Igtp"="ENSMUSG00000078853",
                          "Irf1"="ENSMUSG00000018899",
                          "Lgals3bp"="ENSMUSG00000033880",
                          "Ly6a"="ENSMUSG00000075602",
                          "Plaat3"="ENSMUSG00000060675",
                          "Rtp4"="ENSMUSG00000033355",
                          "Serpina3f"="ENSMUSG00000066363",
                          "Stat1"="ENSMUSG00000026104")

genes_of_interest_exp9.4_9.7 <- list(
                          'Ifi44'     ="ENSMUSG00000028037", 
                          'Ccl5'      ="ENSMUSG00000035042",
                          'Iigp1'     ="ENSMUSG00000054072",
                          'Serpina3f' ="ENSMUSG00000066363",
                          'Ly6a'      ="ENSMUSG00000075602",
                          'Plaat3'    ="ENSMUSG00000060675",
                          'Lgals3bp'  ="ENSMUSG00000033880",
                          'Ifi27'     ="ENSMUSG00000064215",
                          'Gbp6'      ="ENSMUSG00000104713",
                          'Rtp4'      ="ENSMUSG00000033355",
                          'Stat1'     ="ENSMUSG00000026104",
                          'Tap1'      ="ENSMUSG00000037321",
                          'H2-K1'     ="ENSMUSG00000061232",
                          'B2m'       ="ENSMUSG00000060802",
                          'Igtp'      ="ENSMUSG00000078853",
                          'Gtf2ird1'  ="ENSMUSG00000023079",
                          'Cdc42ep4'  ="ENSMUSG00000041598",
                          'Hoxb6'     ="ENSMUSG00000000690",
                          'Bfsp2'     ="ENSMUSG00000032556",
                          'Sox6'      ="ENSMUSG00000051910",
                          'Tgm2'      ="ENSMUSG00000037820",
                          'Adprhl1'   ="ENSMUSG00000031448")

# additional parameters ----

if(exists("deg_design")) {rm(deg_design)}
deg_design <- as.formula("~ experiment + cell_line_label + condition_tp")


# # raw and filtered dds
# precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
# precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# 
# # log2 and vsd transformed
# precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
# precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# 
# # check if object loaded if not load
# if(!exists(precomputed_objects_file)){
#   load(precomputed_objects_file)
#   load(precomputed_transf_objects_file)
# } 
# preparing dds and vsd objects for the main project comparison 
# We use only EXP 9.4 - 9.7. EXP 9.8 is excluded.
# Preparing AB subset. 
# defining parameters

deg_name <- "TpNK_tp2_vs_Tonly_tp1_AB"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)

precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)

precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)

experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")


# just in case remove any existing subsets
if(exists("dds_subset")) {rm(dds_subset)}
if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
if(exists("vsd_subset")) {rm(vsd_subset)}

if(!file.exists(precomputed_objects_deg_file)){
  cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
  cat("generating", precomputed_objects_deg_filename, "\n")

  dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]  
  dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
  dds_subset <- dds_subset[ , dds_subset$cell_line_label %in% c("A", "B")]
  
  # fixing Tumor_plus_WT_NK_timepoint_2 -> Tumor_plus_NK_timepoint_2
  dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
  table(dds_subset$condition_tp)
  
  dds_subset$experiment <- droplevels(dds_subset$experiment)
  table(dds_subset$experiment)
  
  dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
  table(dds_subset$cell_line_label)
  
  # filtering lowly expressed genes
  dds_subset_filt <- filterDatasets(dds_subset, 
                                    abs_filt = TRUE, 
                                    abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size

  cat("... dds\n")
  design(dds_subset_filt) <- deg_design
  dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
  dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
  
  cat("saving...", precomputed_objects_file, "\n")
  cat("...into:", output_dir,deg_name,"\n")
  save(dds_subset, dds_subset_filt, ensemblAnnot,  file = precomputed_objects_deg_file)
  
} else{
  cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
  load(precomputed_objects_deg_file)
}
## RNA objects file ' TpNK_tp2_vs_Tonly_tp1_AB_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_deg_file)){
  # transformation
  log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
  vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
  #rld_filt <- DESeq2::rlog(dds_subset_filt, blind = FALSE) # more robust to differences in sequencing depth!
  #rld_filt - too big for >100 samples; skipping!
  #rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
  save(log2_norm_subset_filt, vsd_subset_filt,   
       file = precomputed_transf_objects_deg_file)
} else{
  load(precomputed_transf_objects_deg_file)
}
#get the counts table for diffTFs  AB cell lines
fl_smpl_nm <- gsub("_Day\\d\\d","",dds_subset@colData@listData[["full_sample_name"]])
fl_smpl_nm <- gsub("_\\d_","_",fl_smpl_nm)

sample_name <- paste0(dds_subset@colData@listData[["original_experiment"]], "_",
                      fl_smpl_nm, "_",
                      dds_subset@colData@listData[["day_cell_harvesting"]], "_REP",
                      dds_subset@colData@listData[["technical_replicate"]])

sample_name <- gsub("^EXP\\s9.", "EXP9_", sample_name)
sample_name <- gsub("\\+", "_plus_", sample_name)
sample_name <- gsub("_Tumor_only_Day\\d_", "_Tumor_only_", sample_name)

#These samples from RNA-seq are missing in ATAC data.
#EXP9_6_13G_Tumor_plus_NK_Day14_REP1 is missing in ./
#EXP9_6_13G_Tumor_plus_NK_Day14_REP2 is missing in ./
#EXP9_6_13G_Tumor_plus_NK_Day14_REP3 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP1 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP2 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP3 is missing in ./
#
#Instead there is Day_10 version.
#
#I'm going to replace in the RNA-seq count table the day 14 with day 10, so that samples match.

sample_name <- gsub("EXP9_6_13G_Tumor_plus_NK_Day14", "EXP9_6_13G_Tumor_plus_NK_Day10", sample_name)
sample_name <- gsub("EXP9_6_13H_Tumor_plus_NK_Day14", "EXP9_6_13H_Tumor_plus_NK_Day10", sample_name)
col_IDS <- c("ENSEMBL", sample_name)
RNA_seq_raw_counts <- dds_subset@assays@data@listData[["counts"]]
RNA_seq_raw_counts <- cbind(row.names(RNA_seq_raw_counts), RNA_seq_raw_counts)
RNA_seq_raw_counts <- rbind(col_IDS,RNA_seq_raw_counts)

write.table(RNA_seq_raw_counts, file = "~/workspace/RNA_seq_raw_counts_AB.tsv",
            sep ="\t", quote = FALSE, row.names = FALSE, col.names = FALSE)

# Building the sample table
sampleID_list <- list("sampleID" = sample_name)
bamReads_list <- list("bamReads" = paste0("/home/aleksandr_b/bioinf_isilon/core_bioinformatics_unit/Internal/aleksandr/projects/ab3_20230220_nk_tumor_project/data_ATACSeq_nfcore_run/results/bwa/merged_library/", sample_name, ".mLb.clN.sorted.bam"))
cond_summ <- sample_name
cond_summ[stringr::str_detect(cond_summ, pattern = "_Tumor_only_")] <- "Tumor_only_TP_1"
cond_summ[stringr::str_detect(cond_summ, pattern = "_Tumor_plus_NK")] <- "Tumor_pls_NK_TP2"
conditionSummary_list <- list("conditionSummary" = cond_summ)
experiment_id_list <- list("experiment_id" = stringr::str_extract(sample_name, "EXP\\d_\\d"))

sampleData.tsv <- data.frame(sampleID_list,
                             bamReads_list,
                             conditionSummary_list,
                             experiment_id_list)
write.table(sampleData.tsv, file = "~/workspace/sampleData.tsv",
            sep ="\t", quote = FALSE, row.names = FALSE)
#print(key_variables_tableOne$CatTable)
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previus runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
  dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate) 

key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)), 
                           strata = c("condition_tp"), 
                           data = key_metadata_subset)

tableone::kableone(key_variables_tableOne$CatTable,
                   caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
  kableExtra::kable_material(c("striped", "hover"))
Overview of number of samples in different categories (experiment, condition,…).
Tumor_only_timepoint_1 Tumor_plus_NK_timepoint_1 Tumor_plus_NK_timepoint_2 p test
n 18 18 18
experiment (%) 1.000
EXP9.5 6 (33.3) 6 (33.3) 6 (33.3)
EXP9.6 6 (33.3) 6 (33.3) 6 (33.3)
EXP9.7 6 (33.3) 6 (33.3) 6 (33.3)
cell_line_label = B (%) 9 (50.0) 9 (50.0) 9 (50.0) 1.000
technical_replicate (%) 1.000
1 6 (33.3) 6 (33.3) 6 (33.3)
2 6 (33.3) 6 (33.3) 6 (33.3)
3 6 (33.3) 6 (33.3) 6 (33.3)
#TODO:
# - plot images after removing batch effects!
# EDA and visualize plots before and after batch effect correction

# transf_object <- vsd_subset_filt
# pca_deg <- generatePCA(transf_object = transf_object, 
#                        cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
#                        color_variable = "condition_tp", 
#                        shape_variable = "experiment",
#                        ntop_genes = 1000) +
#   ggtitle("Original dataset") +
#  geom_text_repel(aes(label = transf_object$sample_name),
#                   box.padding = 3)
# 
# 
# 
# pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object, 
#                        cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
#                        color_variable = "condition_tp", 
#                        shape_variable = "cell_line_label",
#                        ntop_genes = 1000) +
#   ggtitle("Original dataset")
# 
# # Remove batch effect
# transf_batch_NObatch_experiment <- vsd_subset_filt
# transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
# SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
# 
# pca_deg_NObatch_experiment <- generatePCA(transf_object = transf_batch_NObatch_experiment, 
#                        cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
#                        color_variable = "condition_tp", 
#                        shape_variable = "experiment",
#                        ntop_genes = 1000) +
#   ggtitle("Removed batchEffect - experiment")
# 
# pca_deg_NObatch_experiment_plotCellLabel <- generatePCA(transf_object = transf_batch_NObatch_experiment, 
#                        cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
#                        color_variable = "condition_tp", 
#                        shape_variable = "cell_line_label",
#                        ntop_genes = 1000) +
#   ggtitle("Removed batchEffect - experiment")
# 
# #PCA_batch_merged <- ggpubr::ggarrange(pca_deg, pca_deg_NObatch_experiment, common.legend = TRUE)
# 
# 
# PCA_batch_merged <- ggpubr::ggarrange(pca_deg, 
#                                       pca_deg_NObatch_experiment,
#                                       common.legend = TRUE)
# 
# PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel, 
#                                       pca_deg_NObatch_experiment_plotCellLabel,
#                                       common.legend = TRUE)


###### Plots for publication #######
transf_object <- vsd_subset_filt

pca_deg <- generatePCA_plus_shape(transf_object = transf_object, 
                       cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"), 
                       color_variable = "condition_tp", 
                       shape_variable = "Cell_line_label_and_exp_id",
                       ntop_genes = 1000)+
  ggtitle("Original dataset") +
  scale_shape_manual(values = c(0,1,2,15,16,17)) +
  scale_color_manual(values = c("#DD3344", "#FF9F1C" ,"#553388"))
pca_deg

# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count


pca_deg_NObatch_experiment <- generatePCA_plus_shape(transf_object = transf_batch_NObatch_experiment, 
                       cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"), 
                       color_variable = "condition_tp", 
                       shape_variable = "Cell_line_label_and_exp_id",
                       ntop_genes = 1000)+
  ggtitle("Batch Corrected Dataset") +
  scale_shape_manual(values = c(0,1,2,15,16,17)) +
  scale_color_manual(values = c("#DD3344", "#FF9F1C" ,"#553388"))
pca_deg_NObatch_experiment

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_not_batch_corrected.png"), 
       plot=pca_deg,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_batch_corrected.png"), 
       plot=pca_deg_NObatch_experiment,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_not_batch_corrected.eps"), 
       plot=pca_deg,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_batch_corrected.eps"), 
       plot=pca_deg_NObatch_experiment,
       width = 20, height = 14, units = "cm")

The following PCA plots show effect of removing batch effects (experiment). There are two weird samples from EXP9.7 that create a lot of variance. What should i do with them?

print(pca_deg_NObatch_experiment)

print(pca_deg)

#resultsNames(dds_subset_filt)
# timepoint2 (T+NK)_vs_timepoint1 (T)
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt, 
                                                             coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
                                                             cond_numerator = "Tumor_plus_NK_timepoint_2", 
                                                             cond_denominator = "Tumor_only_timepoint_1",
                                                             cond_variable="condition_tp")

openxlsx::write.xlsx(deg_TpNK_tp2_vs_Tonly_tp1_results, file = file.path(deg_dir, "deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx"))

AB_results <- deg_TpNK_tp2_vs_Tonly_tp1_results
# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE) 
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE) 
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE) 
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE) 
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE) 


TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOBP, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
AB_GOBP <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOCC, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
AB_GOCC <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOMF, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
AB_GOMF <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF


#hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOBP.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOCC.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOMF.xlsx"))

# plotting enrichment results ----
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOBP: T+NK timepoint2 vs T-only timepoint1 AB") +theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOCC: T+NK timepoint2 vs T-only timepoint1 AB") +theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOMF:T+NK timepoint2 vs T-only timepoint1 AB") + theme_bw()


###### For publication ######
ggsave(filename = paste0(deg_dir,"publication/" ,"TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
       width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/" ,"TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.eps"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
       width = 20, height = 20, units = "cm")

Highlighting genes of interest

Volcano plot highlighting genes of interest:

# volcano plot
# heatmap - corrected
genes_of_interest
## $Denn2b
## [1] "ENSMUSG00000031024"
## 
## $Gbp6
## [1] "ENSMUSG00000104713"
## 
## $`H2-K1`
## [1] "ENSMUSG00000061232"
## 
## $Hs3st2
## [1] "ENSMUSG00000046321"
## 
## $Htra3
## [1] "ENSMUSG00000029096"
## 
## $Ifi27
## [1] "ENSMUSG00000064215"
## 
## $Igtp
## [1] "ENSMUSG00000078853"
## 
## $Irf1
## [1] "ENSMUSG00000018899"
## 
## $Lgals3bp
## [1] "ENSMUSG00000033880"
## 
## $Ly6a
## [1] "ENSMUSG00000075602"
## 
## $Plaat3
## [1] "ENSMUSG00000060675"
## 
## $Rtp4
## [1] "ENSMUSG00000033355"
## 
## $Serpina3f
## [1] "ENSMUSG00000066363"
## 
## $Stat1
## [1] "ENSMUSG00000026104"
genes_of_interest_exp9.4_9.7
## $Ifi44
## [1] "ENSMUSG00000028037"
## 
## $Ccl5
## [1] "ENSMUSG00000035042"
## 
## $Iigp1
## [1] "ENSMUSG00000054072"
## 
## $Serpina3f
## [1] "ENSMUSG00000066363"
## 
## $Ly6a
## [1] "ENSMUSG00000075602"
## 
## $Plaat3
## [1] "ENSMUSG00000060675"
## 
## $Lgals3bp
## [1] "ENSMUSG00000033880"
## 
## $Ifi27
## [1] "ENSMUSG00000064215"
## 
## $Gbp6
## [1] "ENSMUSG00000104713"
## 
## $Rtp4
## [1] "ENSMUSG00000033355"
## 
## $Stat1
## [1] "ENSMUSG00000026104"
## 
## $Tap1
## [1] "ENSMUSG00000037321"
## 
## $`H2-K1`
## [1] "ENSMUSG00000061232"
## 
## $B2m
## [1] "ENSMUSG00000060802"
## 
## $Igtp
## [1] "ENSMUSG00000078853"
## 
## $Gtf2ird1
## [1] "ENSMUSG00000023079"
## 
## $Cdc42ep4
## [1] "ENSMUSG00000041598"
## 
## $Hoxb6
## [1] "ENSMUSG00000000690"
## 
## $Bfsp2
## [1] "ENSMUSG00000032556"
## 
## $Sox6
## [1] "ENSMUSG00000051910"
## 
## $Tgm2
## [1] "ENSMUSG00000037820"
## 
## $Adprhl1
## [1] "ENSMUSG00000031448"
TpNK_tp2_vs_Tonly_tp1_volcano_plot_1 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest), 
                                             plot_title = "T+NK tp2 vs T-only tp1 AB genes set 1")

TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest_exp9.4_9.7), 
                                             plot_title = "T+NK tp2 vs T-only tp1 AB genes set 2")


##### For publication #####

TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano_repel(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest_exp9.4_9.7), 
                                             plot_title = "T+NK tp2 vs T-only tp1 AB")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_volcano_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
       width = 18, height = 12, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_volcano_plot.eps"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
       width = 18, height = 12, units = "cm")
print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_1)

print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_2)

Heatmap of normalized and scaled expression of 607 significantly differentially expressed genes before and after removeal of batch effects coming from different experiments:

# heatmap and heatmap batch corrected - corrected 
metadata_heatmap <- as.data.frame(colData(dds_subset))

heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C",  Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)

heatmap_not_corrected <- pheatmap::pheatmap(heatmap_counts_deg_ord,
                   main = "Heatmap of signif. DEG",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = FALSE,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_not_corrected.png"), 
       plot=heatmap_not_corrected,
       width = 20, height = 20, units = "cm")
# heatmap and heatmap batch corrected - corrected 
metadata_heatmap <- as.data.frame(colData(dds_subset))

heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C",  Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)

heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG - Batch Corrected",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = FALSE,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.png"), 
       plot=heatmap_corrected,
       width = 20, height = 20, units = "cm")


##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.eps"), 
       plot=heatmap_corrected,
       width = 18, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.png"), 
       plot=heatmap_corrected,
       width = 18, height = 20, units = "cm")
# identify the 25 gene set
minL2FC_Ids <- order(deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
maxL2FC_Ids <- order(decreasing = T, deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
L2FC_EMSEMBL <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id[c(minL2FC_Ids, maxL2FC_Ids)]
L2FC_EMSEMBL <- c(L2FC_EMSEMBL, "ENSMUSG00000075602", "ENSMUSG00000060675")

heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% L2FC_EMSEMBL, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

rownames(heatmap_counts_NObatch_deg_ord) <- ensembl2symbol_annot[match(x = row.names(heatmap_counts_NObatch_deg_ord), table = ensembl2symbol_annot$ensembl_id),]$mgi_symbol

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C",  Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)


heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG - Batch Corrected 50 most regulated genes",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = T,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

<img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB”small heatmap”-1.png” width=“672” />

##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected_50genes.eps"), 
       plot=heatmap_corrected,
       width = 24, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected_50_genes.png"), 
       plot=heatmap_corrected,
       width = 24, height = 20, units = "cm")

Now - process the CD subset. Later I will check the overlap between these two.

# preparing dds and vsd objects for the main project comparison 
# We use only EXP 9.4 - 9.7. EXP 9.8 is excluded.
# Preparing CD subset. 
# defining parameters

deg_name <- "TpNK_tp2_vs_Tonly_tp1_CD"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)

precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)

precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)

experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")


# just in case remove any existing subsets
if(exists("dds_subset")) {rm(dds_subset)}
if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
if(exists("vsd_subset")) {rm(vsd_subset)}

if(!file.exists(precomputed_objects_deg_file)){
  cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
  cat("generating", precomputed_objects_deg_filename, "\n")

  dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]  
  dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
  dds_subset <- dds_subset[ , dds_subset$cell_line_label %in% c("C", "D")]
  
  # fixing Tumor_plus_WT_NK_timepoint_2 -> Tumor_plus_NK_timepoint_2
  dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
  table(dds_subset$condition_tp)
  
  dds_subset$experiment <- droplevels(dds_subset$experiment)
  table(dds_subset$experiment)
  
  dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
  table(dds_subset$cell_line_label)
  
  # filtering lowly expressed genes
 
  dds_subset_filt <- filterDatasets(dds_subset, 
                                    abs_filt = TRUE, 
                                    abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size

  cat("... dds\n")
  design(dds_subset_filt) <- deg_design
  dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
  dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
  
  cat("saving...", precomputed_objects_file, "\n")
  cat("...into:", output_dir, "\n")
  save(dds_subset, dds_subset_filt, ensemblAnnot,  file = precomputed_objects_deg_file)
  
} else{
  cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
  load(precomputed_objects_deg_file)
}
## RNA objects file ' TpNK_tp2_vs_Tonly_tp1_CD_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_deg_file)){
  # transformation
  log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
  vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
  #rld_filt <- DESeq2::rlog(dds_subset_filt, blind = FALSE) # more robust to differences in sequencing depth!
  #rld_filt - too big for >100 samples; skipping!
  #rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
  save(log2_norm_subset_filt, vsd_subset_filt,   
       file = precomputed_transf_objects_deg_file)
} else{
  load(precomputed_transf_objects_deg_file)
}
#print(key_variables_tableOne$CatTable)
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previus runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
  dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate) 

key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)), 
                           strata = c("condition_tp"), 
                           data = key_metadata_subset)

tableone::kableone(key_variables_tableOne$CatTable,
                   caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
  kableExtra::kable_material(c("striped", "hover"))
Overview of number of samples in different categories (experiment, condition,…).
Tumor_only_timepoint_1 Tumor_plus_NK_timepoint_1 Tumor_plus_NK_timepoint_2 p test
n 24 24 23
experiment (%) 1.000
EXP9.4 6 (25.0) 6 (25.0) 6 (26.1)
EXP9.5 6 (25.0) 6 (25.0) 6 (26.1)
EXP9.6 6 (25.0) 6 (25.0) 5 (21.7)
EXP9.7 6 (25.0) 6 (25.0) 6 (26.1)
cell_line_label = D (%) 12 (50.0) 12 (50.0) 11 (47.8) 0.985
technical_replicate (%) 1.000
1 8 (33.3) 8 (33.3) 8 (34.8)
2 8 (33.3) 8 (33.3) 8 (34.8)
3 8 (33.3) 8 (33.3) 7 (30.4)
#TODO:
# - plot images after removing batch effects!
# EDA and visualize plots before and after batch effect correction
transf_object <- vsd_subset_filt
pca_deg <- generatePCA(transf_object = transf_object, 
                       cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                       color_variable = "condition_tp", 
                       shape_variable = "experiment",
                       ntop_genes = 1000) +
  ggtitle("Original dataset")

pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object, 
                       cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                       color_variable = "condition_tp", 
                       shape_variable = "cell_line_label",
                       ntop_genes = 1000) +
  ggtitle("Original dataset")

# Remove batch effect
# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count

pca_deg_NObatch_experiment <- generatePCA(transf_object = transf_batch_NObatch_experiment, 
                       cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                       color_variable = "condition_tp", 
                       shape_variable = "experiment",
                       ntop_genes = 1000) +
  ggtitle("Removed batchEffect - experiment")

pca_deg_NObatch_experiment_plotCellLabel <- generatePCA(transf_object = transf_batch_NObatch_experiment, 
                       cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                       color_variable = "condition_tp", 
                       shape_variable = "cell_line_label",
                       ntop_genes = 1000) +
  ggtitle("Removed batchEffect - experiment")

#PCA_batch_merged <- ggpubr::ggarrange(pca_deg, pca_deg_NObatch_experiment, common.legend = TRUE)


PCA_batch_merged <- ggpubr::ggarrange(pca_deg, 
                                      pca_deg_NObatch_experiment,
                                      common.legend = TRUE)

PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel, 
                                      pca_deg_NObatch_experiment_plotCellLabel,
                                      common.legend = TRUE)


#### for publication #####
transf_object <- vsd_subset_filt
pca_deg <- generatePCA_plus_shape(transf_object = transf_object, 
                       cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"), 
                       color_variable = "condition_tp", 
                       shape_variable = "Cell_line_label_and_exp_id",
                       ntop_genes = 1000)+
  ggtitle("Original dataset") +
  scale_shape_manual(values = c(0,1,2,5,15,16,17,18)) +
  scale_color_manual(values = c("#DD3344", "#FF9F1C", "#553388"))
pca_deg

# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count


pca_deg_NObatch_experiment <- generatePCA_plus_shape(transf_object = transf_batch_NObatch_experiment, 
                       cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"), 
                       color_variable = "condition_tp", 
                       shape_variable = "Cell_line_label_and_exp_id",
                       ntop_genes = 1000)+
  ggtitle("Batch Corrected Dataset") +
  scale_shape_manual(values = c(0,1,2,5,15,16,17,18)) +
  scale_color_manual(values = c("#DD3344", "#FF9F1C", "#553388"))
pca_deg_NObatch_experiment

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_not_batch_corrected.png"), 
       plot=pca_deg,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_batch_corrected.png"), 
       plot=pca_deg_NObatch_experiment,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_not_batch_corrected.eps"), 
       plot=pca_deg,
       width = 20, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_batch_corrected.eps"), 
       plot=pca_deg_NObatch_experiment,
       width = 20, height = 14, units = "cm")
#resultsNames(dds_subset_filt)

# timepoint2 (T+NK)_vs_timepoint1 (T)
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt, 
                                                             coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
                                                             cond_numerator = "Tumor_plus_NK_timepoint_2", 
                                                             cond_denominator = "Tumor_only_timepoint_1",
                                                             cond_variable="condition_tp")
CD_results <- deg_TpNK_tp2_vs_Tonly_tp1_results
openxlsx::write.xlsx(deg_TpNK_tp2_vs_Tonly_tp1_results, file = file.path(deg_dir, "deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx"))
#msigdbr::msigdbr_species()
#packageVersion("msigdbr")
#msigdbr::msigdbr_collections()

# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE) 
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE) 
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE) 
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE) 
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE) 

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOBP, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
CD_GOBP <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOCC, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
CD_GOCC <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol, 
                                                      genesets = gs_C5_GOMF, 
                                                      test="hypergeometric", 
                                                      background=nrow(dds_subset_filt))
CD_GOMF <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF

#hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOBP.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOCC.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOMF.xlsx"))

# plotting enrichment results ----
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOBP: T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOCC: T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()

TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOMF:T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
       width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOCC_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot,
       width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOMF_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot,
       width = 20, height = 20, units = "cm")


###### For publication ######

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
       width = 20, height = 20, units = "cm")

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.eps"), 
       plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
       width = 20, height = 20, units = "cm")

Highlighting genes of interest

Volcano plot highlighting genes of interest:

# volcano plot
# heatmap - corrected
genes_of_interest
## $Denn2b
## [1] "ENSMUSG00000031024"
## 
## $Gbp6
## [1] "ENSMUSG00000104713"
## 
## $`H2-K1`
## [1] "ENSMUSG00000061232"
## 
## $Hs3st2
## [1] "ENSMUSG00000046321"
## 
## $Htra3
## [1] "ENSMUSG00000029096"
## 
## $Ifi27
## [1] "ENSMUSG00000064215"
## 
## $Igtp
## [1] "ENSMUSG00000078853"
## 
## $Irf1
## [1] "ENSMUSG00000018899"
## 
## $Lgals3bp
## [1] "ENSMUSG00000033880"
## 
## $Ly6a
## [1] "ENSMUSG00000075602"
## 
## $Plaat3
## [1] "ENSMUSG00000060675"
## 
## $Rtp4
## [1] "ENSMUSG00000033355"
## 
## $Serpina3f
## [1] "ENSMUSG00000066363"
## 
## $Stat1
## [1] "ENSMUSG00000026104"
genes_of_interest_exp9.4_9.7
## $Ifi44
## [1] "ENSMUSG00000028037"
## 
## $Ccl5
## [1] "ENSMUSG00000035042"
## 
## $Iigp1
## [1] "ENSMUSG00000054072"
## 
## $Serpina3f
## [1] "ENSMUSG00000066363"
## 
## $Ly6a
## [1] "ENSMUSG00000075602"
## 
## $Plaat3
## [1] "ENSMUSG00000060675"
## 
## $Lgals3bp
## [1] "ENSMUSG00000033880"
## 
## $Ifi27
## [1] "ENSMUSG00000064215"
## 
## $Gbp6
## [1] "ENSMUSG00000104713"
## 
## $Rtp4
## [1] "ENSMUSG00000033355"
## 
## $Stat1
## [1] "ENSMUSG00000026104"
## 
## $Tap1
## [1] "ENSMUSG00000037321"
## 
## $`H2-K1`
## [1] "ENSMUSG00000061232"
## 
## $B2m
## [1] "ENSMUSG00000060802"
## 
## $Igtp
## [1] "ENSMUSG00000078853"
## 
## $Gtf2ird1
## [1] "ENSMUSG00000023079"
## 
## $Cdc42ep4
## [1] "ENSMUSG00000041598"
## 
## $Hoxb6
## [1] "ENSMUSG00000000690"
## 
## $Bfsp2
## [1] "ENSMUSG00000032556"
## 
## $Sox6
## [1] "ENSMUSG00000051910"
## 
## $Tgm2
## [1] "ENSMUSG00000037820"
## 
## $Adprhl1
## [1] "ENSMUSG00000031448"
TpNK_tp2_vs_Tonly_tp1_volcano_plot_1 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest), 
                                             plot_title = "T+NK tp2 vs T-only tp1 CD genes set 1")

TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest_exp9.4_9.7), 
                                             plot_title = "T+NK tp2 vs T-only tp1 CD genes set 2")


ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot_gene_set_1.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_1,
       width = 20, height = 20, units = "cm")

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot_gene_set_2.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
       width = 20, height = 20, units = "cm")


##### for publication ######

TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano_repel(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all, 
                                             genes_of_interest = names(genes_of_interest_exp9.4_9.7), 
                                             plot_title = "T+NK tp2 vs T-only tp1 CD")

ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot.png"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
       width = 18, height = 12, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot.eps"), 
       plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
       width = 18, height = 12, units = "cm")
print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_1)

print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_2)

Heatmap of normalized and scaled expression of 321 significantly differentially expressed genes before and after removeal of batch effects coming from different experiments:

# heatmap and heatmap batch corrected - corrected 
metadata_heatmap <- as.data.frame(colData(dds_subset))

heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)

heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = FALSE,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_batch_corrected.png"), 
       plot=heatmap_corrected,
       width = 20, height = 20, units = "cm")

###### For publication #####

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_bathc_corrected.png"), 
       plot=heatmap_corrected,
       width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_batch_corrected.eps"), 
       plot=heatmap_corrected,
       width = 20, height = 20, units = "cm")
# identify the 25 gene set
minL2FC_Ids <- order(deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
maxL2FC_Ids <- order(decreasing = T, deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
L2FC_EMSEMBL <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id[c(minL2FC_Ids, maxL2FC_Ids)]
L2FC_EMSEMBL <- c(L2FC_EMSEMBL, "ENSMUSG00000075602", "ENSMUSG00000060675")

heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% L2FC_EMSEMBL, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

rownames(heatmap_counts_NObatch_deg_ord) <- ensembl2symbol_annot[match(x = row.names(heatmap_counts_NObatch_deg_ord), table = ensembl2symbol_annot$ensembl_id),]$mgi_symbol

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)


heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG - Batch Corrected 50 most regulated genes",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = T,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

<img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 CD”small heatmap”-1.png” width=“672” />

##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected_50_genes.eps"), 
       plot=heatmap_corrected,
       width = 24, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected_50_genes.png"), 
       plot=heatmap_corrected,
       width = 24, height = 20, units = "cm")
# heatmap and heatmap batch corrected - corrected 
metadata_heatmap <- as.data.frame(colData(dds_subset))

heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c(EXP9.4 = "#4682B4", EXP9.5 = "#7846B4", EXP9.6 = "#B47846", EXP9.7 = "#82B446", EXP9.8 = "grey")
)

heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG - removed experiment BatchEffect",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   show_rownames = FALSE,
                   cluster_cols = FALSE,
                   #cluster_rows = counts_deg_ord_row_cor_hclust,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected.png"), 
       plot=heatmap_corrected,
       width = 20, height = 20, units = "cm")
AB_CD_UP_venn <- list("AB_up" = AB_results$results_signif[AB_results$results_signif$log2FoldChange > 0,]$mgi_symbol,
                   "CD_up" = CD_results$results_signif[CD_results$results_signif$log2FoldChange > 0,]$mgi_symbol)

AB_CD_DOWN_venn <- list("AB_down" = AB_results$results_signif[AB_results$results_signif$log2FoldChange < 0,]$mgi_symbol,
                   "CD_down" = CD_results$results_signif[CD_results$results_signif$log2FoldChange < 0,]$mgi_symbol)

library(ggvenn)
AB_CD_UP_venn_plot <- ggvenn(
  AB_CD_UP_venn, 
  fill_color = c("#E0E2E3", "#8EA7CE"),
  stroke_size = 0.5, set_name_size = 4
  )


AB_CD_DOWN_venn_plot <- ggvenn(
  AB_CD_DOWN_venn, 
  fill_color = c("#E0E2E3", "#8EA7CE"),
  stroke_size = 0.5, set_name_size = 4
  )


deg_name <- "TpNK_tp2_AB_CD_comparison"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))

intersect(AB_results$results_signif[AB_results$results_signif$log2FoldChange < 0,]$mgi_symbol,
          CD_results$results_signif[CD_results$results_signif$log2FoldChange < 0,]$mgi_symbol)
##  [1] "Bfsp2"    "Hspb8"    "Ckap4"    "Denn2b"   "Hs3st2"   "Mgll"     "Cdc42ep4" "Egfl7"   
##  [9] "Akr1c13"  "Apbb1"    "Pygl"     "Tcf7l1"   "Gria2"    "Tmem176a" "Golm1"    "Ntrk3"   
## [17] "Tmem255a" "Tmem38b"  "Gm36120"  "Arid5b"   "n-R5s118" "H2ac15"   "Ramp2"    "Tmed6"   
## [25] "Rgs7bp"   "Ccdc120"  "Slc47a1"
ggsave(filename = paste0(deg_dir, "AB_CD_UP_venn_plot.png"), 
       plot=AB_CD_UP_venn_plot,
       width = 20, height = 20, units = "cm")

ggsave(filename = paste0(deg_dir, "AB_CD_UP_venn_plot.eps"), 
       plot=AB_CD_UP_venn_plot,
       width = 20, height = 20, units = "cm")

ggsave(filename = paste0(deg_dir, "AB_CD_DOWN_venn_plot.png"), 
       plot=AB_CD_DOWN_venn_plot,
       width = 20, height = 20, units = "cm")

ggsave(filename = paste0(deg_dir, "AB_CD_DOWN_venn_plot.eps"), 
       plot=AB_CD_DOWN_venn_plot,
       width = 20, height = 20, units = "cm")


#aggreagated excel sheet for 
Deg_AB_xlsx <- openxlsx::read.xlsx("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_AB/deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx",sheet = 'results_signif')
Deg_CD_xlsx <- openxlsx::read.xlsx("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_CD/deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx",sheet = 'results_signif')


AB_specific <- Deg_AB_xlsx[Deg_AB_xlsx$mgi_symbol %in% setdiff(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol),]%>%
  select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
CD_specific <- Deg_CD_xlsx[Deg_CD_xlsx$mgi_symbol %in% setdiff(Deg_CD_xlsx$mgi_symbol, Deg_AB_xlsx$mgi_symbol),]%>%
  select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")

AB_intersect_tmp <-  Deg_AB_xlsx[which(Deg_AB_xlsx$mgi_symbol %in% intersect(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol)), ] %>% 
  select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj") %>% 
  rename("AB_log2FoldChange" = "log2FoldChange", "AB_padj" = "padj")

CD_intersect_tmp <-  Deg_CD_xlsx[which(Deg_CD_xlsx$mgi_symbol %in% intersect(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol)), ] %>% 
  select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj") %>% 
  rename("CD_log2FoldChange" = "log2FoldChange", "CD_padj" = "padj")

CD_intersect_tmp <- CD_intersect_tmp[match(AB_intersect_tmp$mgi_symbol, CD_intersect_tmp$mgi_symbol),] 

AB_CD_intersect <- cbind(AB_intersect_tmp, select(CD_intersect_tmp, "CD_log2FoldChange", "CD_padj"))

AB_CD_ABCD_diff_intersect <- list("AB_specific" = AB_specific,
                                  "CD_specific" = CD_specific,
                                  "AB_CD_intersect" = AB_CD_intersect)
openxlsx::write.xlsx(AB_CD_ABCD_diff_intersect, 
                     "~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/AB_CD_diff_and_interactions.xlsx")

Make a small heatmap with genes, overlaping from AB and CD.

deg_name <- "TpNK_tp2_AB_CD_comparison"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)

precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)

precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)

experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")

AB_CD_intersect_genes <- intersect(AB_results$results_signif$mgi_symbol, CD_results$results_signif$mgi_symbol)

AB_res_subset <- AB_results$results_signif[AB_results$results_signif$mgi_symbol %in% AB_CD_intersect_genes,]
#CD_res_subset <- CD_results$results_signif[CD_results$results_signif$mgi_symbol %in% AB_CD_intersect_genes,]

AB_res_subset_UP <- AB_res_subset[AB_res_subset$log2FoldChange > 0,]
AB_res_subset_DOWN <- AB_res_subset[AB_res_subset$log2FoldChange < 0,]

#AB_res_subset_UP <- AB_res_subset_UP %>% arrange(desc(log2FoldChange)) %>% slice_head(n=20)
#AB_res_subset_DOWN <- AB_res_subset_DOWN %>% arrange(log2FoldChange) %>% slice_head(n=10)

#OR padj
AB_res_subset_UP <- AB_res_subset_UP %>% arrange(padj) %>% slice_head( n = 20)
AB_res_subset_DOWN <- AB_res_subset_DOWN %>% arrange(desc(padj)) %>% slice_head(n=10)

geneset <- c(AB_res_subset_UP$mgi_symbol, AB_res_subset_DOWN$mgi_symbol)


deg_name <- "TpNK_tp2_vs_Tonly_tp1_AB"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))

precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)

precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)

load(precomputed_objects_deg_file)
load(precomputed_transf_objects_deg_file)

transf_object <- vsd_subset_filt
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count

metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
    dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
    dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]


ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
    dplyr::select(ensembl_id, mgi_symbol)

ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]


indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]


rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
    experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
    condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C",  Tumor_plus_NK_timepoint_2 = "#553388"),
    cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)

heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                                        main = "Heatmap of signif. DEG - Batch Corrected",
                                        scale = "row",
                                        annotation_col = annotation_col,
                                        annotation_colors = ann_colors,
                                        #annotation_row = row_annot_symbols,
                                        show_colnames = FALSE,
                                        show_rownames = TRUE,
                                        cluster_cols = FALSE,
                                        cluster_rows = FALSE,
                                        color = color.scheme,
                                        fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) 

<img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB_CD”heatmap - overlap of genes from AB and CD”-1.png” width=“672” />

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")



##### The same but for CD #####

deg_name <- "TpNK_tp2_vs_Tonly_tp1_CD"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))

precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)

precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)

load(precomputed_objects_deg_file)
load(precomputed_transf_objects_deg_file)

transf_object <- vsd_subset_filt
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count

metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
    dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
    dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]


ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
    dplyr::select(ensembl_id, mgi_symbol)

ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]


indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]


rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)


heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                                        main = "Heatmap of signif. DEG - Batch Corrected",
                                        scale = "row",
                                        annotation_col = annotation_col,
                                        annotation_colors = ann_colors,
                                        #annotation_row = row_annot_symbols,
                                        show_colnames = FALSE,
                                        show_rownames = TRUE,
                                        cluster_cols = FALSE,
                                        cluster_rows = FALSE,
                                        color = color.scheme,
                                        
                                        fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) 

<img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB_CD”heatmap - overlap of genes from AB and CD”-2.png” width=“672” />

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")

ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")
#First, I need to load full Dataset:

experiment_name="nk_tum_immunoedit_complete"
output_dir <- "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/" 
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)

# log2 and vsd transformed
precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)

# check if object loaded if not load
if(!exists(precomputed_objects_file)){
    load(precomputed_objects_file)
    load(precomputed_transf_objects_file)
} 
experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")
condition_tp_subset <- c("Tumor_only_timepoint_1",
                         "Tumor_plus_NK_timepoint_2",
                         "Tumor_plus_NK_timepoint_1",
                         "Tumor_plus_WT_NK_timepoint_2")
abs_filt_samples=3
deg_design <- as.formula("~ experiment + cell_line_label + condition_tp")


dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]  
dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)
## 
##    Tumor_only_timepoint_1 Tumor_plus_NK_timepoint_1 Tumor_plus_NK_timepoint_2 
##                        42                        42                        41
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)
## 
## EXP9.4 EXP9.5 EXP9.6 EXP9.7 
##     18     36     35     36
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
table(dds_subset$cell_line_label)
## 
##  A  B  C  D 
## 27 27 36 35
dds_subset_filt <- filterDatasets(dds_subset, 
                                  abs_filt = TRUE, 
                                  abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
## Original dds object samples:  125  genes:  15430 
## Minimum number of samples with expression: 3 
## Number of filtered genes: 433 
## Filtered dds object has samples: 125 genes: 14997
cat("... dds\n")
## ... dds
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) 
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt, 
                                                         coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
                                                         cond_numerator = "Tumor_plus_NK_timepoint_2", 
                                                         cond_denominator = "Tumor_only_timepoint_1",
                                                         cond_variable="condition_tp")
 deg_TpNK_tp2_vs_Tonly_tp1_results
## $results_signif
##            ensembl_id mgi_symbol                                         mgi_description entrezgene
## 1  ENSMUSG00000075602       Ly6a                   lymphocyte antigen 6 complex, locus A     110454
## 2  ENSMUSG00000060675     Plaat3                   phospholipase A and acyltransferase 3     225845
## 3  ENSMUSG00000033880   Lgals3bp lectin, galactoside-binding, soluble, 3 binding protein      19039
## 4  ENSMUSG00000064215      Ifi27                  interferon, alpha-inducible protein 27      52668
## 5  ENSMUSG00000104713       Gbp6                             guanylate binding protein 6     100702
## 6  ENSMUSG00000033355       Rtp4                          receptor transporter protein 4      67775
## 7  ENSMUSG00000026104      Stat1      signal transducer and activator of transcription 1      20846
## 8  ENSMUSG00000054072      Iigp1                           interferon inducible GTPase 1      60440
## 9  ENSMUSG00000061232      H2-K1                      histocompatibility 2, K1, K region      14972
## 10 ENSMUSG00000073555     Gm4951                                     predicted gene 4951     240327
##      baseMean MeanExpr_Tumor_only_timepoint_1 MeanExpr_Tumor_plus_NK_timepoint_2 log2FoldChange
## 1  2456.40621                      317.387691                         3080.57543      3.2258807
## 2   242.32525                       61.223411                          324.05583      2.3592201
## 3    96.84177                       11.627393                          119.52526      3.3142561
## 4   364.12250                      153.624891                          426.71025      1.4770706
## 5   128.50281                        6.047601                          135.68641      4.1428494
## 6    39.88500                        3.053751                           46.64006      3.7056800
## 7   342.79939                       79.385613                          393.15858      1.9873128
## 8  1112.75679                       42.307708                         1175.32669      4.3614229
## 9  4893.62031                     3035.265875                         5424.20556      0.8243463
## 10   95.71447                       10.450136                          109.08363      2.9734537
##         lfcSE FoldChange        pvalue          padj   gene_biotype chromosome_name start_position
## 1  0.10740032   9.355928 9.066006e-206 1.331071e-201 protein_coding              15       74994877
## 2  0.09136706   5.130929 2.798105e-150 2.054089e-146 protein_coding              19        7557459
## 3  0.15494094   9.946963 1.049868e-103 5.138053e-100 protein_coding              11      118392751
## 4  0.06968169   2.783829 6.176427e-102  2.267058e-98 protein_coding              12      103434211
## 5  0.23046748  17.665338  3.423172e-77  1.005180e-73 protein_coding               5      105270702
## 6  0.21981886  13.047305  6.073340e-65  1.486146e-61 protein_coding              16       23520291
## 7  0.12601002   3.964978  5.014857e-61  1.051830e-57 protein_coding               1       52119440
## 8  0.29663413  20.555077  5.585136e-58  1.025012e-54 protein_coding              18       60376027
## 9  0.05356110   1.770732  6.395728e-55  1.043356e-51 protein_coding              17       33996017
## 10 0.20013176   7.854142  1.474542e-54  2.164923e-51 protein_coding              18       60212080
##    end_position strand Tumor_only_timepoint_1-S_4_R0108 Tumor_only_timepoint_1-S_5_R0108
## 1      74998031     -1                      226.3811564                       341.817725
## 2       7588545      1                       28.0879052                        43.238856
## 3     118402092     -1                        1.7388754                        14.990335
## 4     103440239      1                       98.6588702                       122.576546
## 5     105293698     -1                        1.7271571                        14.889315
## 6      23614222      1                        0.8639205                         5.319718
## 7      52161865      1                       74.6331721                        79.944903
## 8      60392634      1                       29.1684966                        69.538617
## 9      34000347     -1                     2565.3386318                      2781.017989
## 10     60247820      1                        7.8307557                        15.001477
##    Tumor_only_timepoint_1-S_6_R0108 Tumor_only_timepoint_1-S_7_R0108
## 1                        288.630468                       148.275585
## 2                         39.019215                        34.333732
## 3                          6.625686                        13.855399
## 4                        100.911393                       160.638370
## 5                          3.290518                         5.004374
## 6                          0.000000                         2.503178
## 7                         61.756281                        70.215071
## 8                         59.599173                        54.614600
## 9                       2583.232844                      2773.171511
## 10                        16.576527                         5.042072
##    Tumor_only_timepoint_1-S_8_R0108 Tumor_only_timepoint_1-S_9_R0108
## 1                        273.597302                       161.953437
## 2                         65.456486                        53.728279
## 3                         18.428030                         6.908138
## 4                        155.662949                       139.898918
## 5                         12.920360                         0.000000
## 6                          7.539861                         6.864301
## 7                        109.368560                        91.134837
## 8                        118.646154                        30.616653
## 9                       2779.194878                      2806.570115
## 10                        10.848075                        11.522122
##    Tumor_only_timepoint_1-S_34_R0108 Tumor_only_timepoint_1-S_35_R0108
## 1                         297.822212                        351.120560
## 2                          64.761167                         60.833866
## 3                           6.168086                          4.108498
## 4                          75.784308                        108.644603
## 5                           6.126519                          5.441081
## 6                           1.225789                          4.082427
## 7                          71.821417                         79.658971
## 8                          30.861415                         41.956265
## 9                        3064.580619                       3345.985933
## 10                          9.876273                          8.223104
##    Tumor_only_timepoint_1-S_36_R0108 Tumor_only_timepoint_1-S_37_R0108
## 1                         420.097841                        290.666139
## 2                          47.844025                         43.135620
## 3                           8.124189                          2.359208
## 4                          78.863488                        126.747439
## 5                          16.138880                          0.000000
## 6                           6.727196                          0.000000
## 7                         106.799762                        101.535313
## 8                          41.444818                         27.141134
## 9                        2950.083088                       3153.145857
## 10                         18.970531                          2.360961
##    Tumor_only_timepoint_1-S_38_R0108 Tumor_only_timepoint_1-S_39_R0108
## 1                         402.564866                        406.966531
## 2                          57.841038                         84.857099
## 3                           9.171110                         19.052192
## 4                         178.052411                        166.780680
## 5                           7.085016                          6.678988
## 6                           2.025092                          4.454422
## 7                          97.821519                         96.012927
## 8                          82.837569                         97.044997
## 9                        3091.523270                       3465.008042
## 10                         12.237236                         12.337052
##    Tumor_only_timepoint_1-S_40_R0108 Tumor_only_timepoint_1-S_41_R0108
## 1                         316.059288                         457.71021
## 2                          55.206801                          64.08933
## 3                          16.929106                          17.31344
## 4                         139.644242                         135.22537
## 5                           6.005364                           9.55376
## 6                           4.806194                           6.69028
## 7                          52.263737                          88.42372
## 8                          28.980145                         101.49043
## 9                        2911.020108                        3081.24480
## 10                         14.521448                          23.10175
##    Tumor_only_timepoint_1-S_42_R0108 Tumor_only_timepoint_1-S_43_R0108
## 1                          480.48704                        199.771467
## 2                           54.28901                         81.112141
## 3                           32.73265                         20.566199
## 4                          185.68004                        215.221101
## 5                           19.73947                          9.676233
## 6                            4.64642                          3.226688
## 7                          103.96059                         87.287023
## 8                          104.66037                         34.836180
## 9                         3006.12358                       3010.527465
## 10                          14.03871                         11.915597
##    Tumor_only_timepoint_1-S_44_R0108 Tumor_only_timepoint_1-S_45_R0108
## 1                         229.257848                        203.016839
## 2                         101.769822                         60.487194
## 3                          19.150569                         18.594181
## 4                         203.126981                        201.245183
## 5                           0.000000                          0.000000
## 6                           1.189315                          8.211639
## 7                          95.951230                         66.074768
## 8                          44.097532                         36.432202
## 9                        2601.755822                       3026.664063
## 10                          5.989001                          8.270223
##    Tumor_only_timepoint_1-S_70_R0108 Tumor_only_timepoint_1-S_71_R0108
## 1                         440.139715                        323.892923
## 2                          48.336005                         57.581463
## 3                          11.796622                          9.871678
## 4                         110.169108                         89.218043
## 5                          15.232262                          8.715691
## 6                           1.172176                          1.089893
## 7                         100.600029                         66.246309
## 8                         158.691062                         73.283414
## 9                        2943.392418                       3247.204251
## 10                          7.083234                          7.683679
##    Tumor_only_timepoint_1-S_72_R0108 Tumor_only_timepoint_1-S_73_R0108
## 1                         265.136089                        459.069572
## 2                          28.606686                         43.864671
## 3                           6.839694                         19.749736
## 4                         111.856885                        106.104620
## 5                           0.000000                         22.559139
## 6                           3.883595                          4.906103
## 7                          94.042735                         90.517228
## 8                          37.192036                        104.972199
## 9                        2796.872346                       2890.697668
## 10                          4.889127                         13.835091
##    Tumor_only_timepoint_1-S_74_R0108 Tumor_only_timepoint_1-S_75_R0108
## 1                         354.799205                        318.324971
## 2                          57.523615                         59.434830
## 3                           5.907787                          4.709775
## 4                          85.429245                         83.949419
## 5                           3.520785                          7.017053
## 6                           1.174060                          2.339944
## 7                          84.495940                         81.791847
## 8                          14.314338                         39.161395
## 9                        2474.143255                       2743.189753
## 10                         15.371664                         10.604869
##    Tumor_only_timepoint_1-S_76_R0108 Tumor_only_timepoint_1-S_77_R0108
## 1                         174.547255                        289.569693
## 2                          70.389334                         88.539703
## 3                           6.577640                         13.916724
## 4                         100.399813                        165.883122
## 5                           0.000000                         15.797644
## 6                           5.228720                          5.926462
## 7                          53.714433                         82.068469
## 8                          23.102968                         86.155960
## 9                        2747.646997                       3257.905502
## 10                          3.949517                         11.937487
##    Tumor_only_timepoint_1-S_78_R0108 Tumor_only_timepoint_1-S_79_R0108
## 1                         226.481125                        217.422651
## 2                          52.089351                         71.157408
## 3                           6.879499                         12.983855
## 4                         114.182699                        178.061376
## 5                          11.388562                          8.290515
## 6                           1.139307                          9.215331
## 7                          74.665224                         70.239239
## 8                          43.979256                         44.271868
## 9                        2742.242177                       3008.792871
## 10                         11.474353                          9.281076
##    Tumor_only_timepoint_1-S_80_R0108 Tumor_only_timepoint_1-S_81_R0108
## 1                         237.785720                        199.559990
## 2                          65.070061                         73.968560
## 3                           9.310747                         13.759217
## 4                         193.700233                        226.385809
## 5                           6.473601                          9.461419
## 6                           3.700666                          7.361795
## 7                          81.003262                         83.840786
## 8                          48.784798                         33.382367
## 9                        2553.959914                       2921.514493
## 10                         11.181201                         10.591880
##    Tumor_only_timepoint_1-S_1_R0114_L5311 Tumor_only_timepoint_1-S_2_R0114_L5311
## 1                             505.9655486                            534.8699989
## 2                              60.9952416                             83.8498816
## 3                               6.0972113                              9.4535404
## 4                             138.0414336                            148.5011370
## 5                               1.1011131                              1.1177423
## 6                               0.5507746                              0.6709679
## 7                              88.9047202                             39.0957467
## 8                               1.5496474                              4.3073388
## 9                            4343.4360644                           4697.6721201
## 10                              6.1017433                              1.3515096
##    Tumor_only_timepoint_1-S_3_R0114_L5311 Tumor_only_timepoint_1-S_4_R0114_L5311
## 1                              398.278202                            349.7592985
## 2                               81.552390                             20.4637753
## 3                                0.000000                             12.7626253
## 4                              125.031832                            117.7537527
## 5                                4.732983                              2.2370502
## 6                                0.000000                              0.0000000
## 7                               74.366562                             76.4479256
## 8                                3.227663                              0.6996231
## 9                             3541.985255                           2258.0354224
## 10                               3.577009                              6.7617062
##    Tumor_only_timepoint_1-S_5_R0114_L5311 Tumor_only_timepoint_1-S_6_R0114_L5311
## 1                             400.4039886                            378.5871716
## 2                              36.2312671                             32.1178943
## 3                               3.6620692                              0.7574725
## 4                             150.1706045                            186.3597761
## 5                               0.7274781                              0.0000000
## 6                               2.1832985                              0.0000000
## 7                              83.9293788                             81.2914481
## 8                               5.2486655                              4.9412632
## 9                            3216.1610799                           2904.2991488
## 10                             16.8580396                              5.3062488
##    Tumor_only_timepoint_1-S_7_R0114_L5311 Tumor_only_timepoint_1-S_8_R0114_L5311
## 1                              405.189817                             345.041272
## 2                              136.987058                              66.942635
## 3                               27.320185                              11.367242
## 4                              270.690680                             216.884210
## 5                                2.713607                               1.254515
## 6                                0.000000                               0.000000
## 7                               56.265323                              64.204041
## 8                                0.000000                               1.177025
## 9                             4556.187967                            2642.948039
## 10                              17.315645                               6.319829
##    Tumor_only_timepoint_1-S_9_R0114_L5311 Tumor_only_timepoint_1-S_10_R0114_L5311
## 1                              321.561946                             303.5678649
## 2                               64.563574                              57.1540398
## 3                                4.263492                              10.2217886
## 4                              189.936624                             226.7748238
## 5                                4.940553                               0.0000000
## 6                                0.000000                               1.5626037
## 7                               65.269987                              83.8668240
## 8                                5.284401                               0.7327513
## 9                             3465.607292                            3154.0607381
## 10                               4.977771                               5.5081311
##    Tumor_only_timepoint_1-S_11_R0114_L5311 Tumor_only_timepoint_1-S_12_R0114_L5311
## 1                              177.4169334                              206.715578
## 2                               85.4001696                               88.472046
## 3                               26.9457231                               16.380283
## 4                              240.2594438                              283.137834
## 5                                0.0000000                                2.440484
## 6                                0.8924911                                4.882902
## 7                               62.2162061                               60.448264
## 8                                3.3481249                                5.148823
## 9                             2508.3364761                             2868.686873
## 10                              25.1680349                               13.113966
##    Tumor_plus_NK_timepoint_2-S_16_R0108 Tumor_plus_NK_timepoint_2-S_17_R0108
## 1                            1747.63424                           2043.64240
## 2                             181.27471                            176.89550
## 3                              84.16812                            115.67506
## 4                             379.78296                            376.28100
## 5                              28.98165                             32.40643
## 6                              42.37457                             63.85612
## 7                             122.06926                            141.00851
## 8                             106.58488                            116.17965
## 9                            6905.47407                           6449.94761
## 10                             24.70767                             37.59760
##    Tumor_plus_NK_timepoint_2-S_18_R0108 Tumor_plus_NK_timepoint_2-S_19_R0108
## 1                            1961.66038                           2169.31465
## 2                             208.42627                            252.79548
## 3                             131.79911                            126.70198
## 4                             393.85502                            640.51907
## 5                              38.00639                             67.76438
## 6                              63.36908                             58.10676
## 7                             166.78004                            210.28623
## 8                             203.35784                            306.30693
## 9                            6295.43975                           5827.26955
## 10                             44.67482                             32.51184
##    Tumor_plus_NK_timepoint_2-S_20_R0108 Tumor_plus_NK_timepoint_2-S_21_R0108
## 1                            1663.59370                           1719.85123
## 2                             297.94389                            307.26016
## 3                             148.16929                            124.86695
## 4                             566.98286                            551.29567
## 5                              40.31177                             56.01150
## 6                              59.30549                             44.02647
## 7                             197.19845                            203.08716
## 8                             240.53894                            271.33505
## 9                            5807.39212                           5961.36343
## 10                             27.47515                             40.30960
##    Tumor_plus_NK_timepoint_2-S_58_R0108 Tumor_plus_NK_timepoint_2-S_59_R0108
## 1                            6488.80633                           6112.69599
## 2                             503.96062                            519.00145
## 3                             197.54182                            172.40962
## 4                             600.59927                            521.49861
## 5                             549.18734                            461.83058
## 6                              82.96721                             61.25829
## 7                            1193.43729                           1284.76246
## 8                            4667.39493                           4185.81629
## 9                            6607.43373                           6610.74395
## 10                            335.25549                            308.47662
##    Tumor_plus_NK_timepoint_2-S_60_R0108 Tumor_plus_NK_timepoint_2-S_61_R0108
## 1                            6934.22563                           2570.28564
## 2                             517.09769                            326.79839
## 3                             141.77686                            120.59119
## 4                             608.79137                            319.08203
## 5                             528.39189                             77.95110
## 6                              57.34824                             23.77499
## 7                            1221.99259                            292.64331
## 8                            4105.83752                            573.06006
## 9                            6155.02388                           5435.19475
## 10                            297.57603                             69.91826
##    Tumor_plus_NK_timepoint_2-S_62_R0108 Tumor_plus_NK_timepoint_2-S_63_R0108
## 1                            3412.69962                           2379.91330
## 2                             331.81238                            309.76795
## 3                             128.80487                            114.34307
## 4                             353.07839                            340.29375
## 5                             176.34539                            130.09215
## 6                              17.29561                             17.55907
## 7                             384.82494                            274.10965
## 8                            1045.62747                            621.84628
## 9                            5600.59252                           5799.93438
## 10                            130.06187                            100.90474
##    Tumor_plus_NK_timepoint_2-S_64_R0108 Tumor_plus_NK_timepoint_2-S_65_R0108
## 1                             4339.3412                            4841.7061
## 2                              301.9529                             342.1000
## 3                              213.3512                             193.8010
## 4                              443.5946                             409.1050
## 5                              146.5486                             243.3223
## 6                              147.5576                             123.3321
## 7                              606.5484                             629.9075
## 8                             2337.2736                            2906.8185
## 9                             6228.9387                            5952.1540
## 10                             212.5692                             223.3637
##    Tumor_plus_NK_timepoint_2-S_66_R0108 Tumor_plus_NK_timepoint_2-S_67_R0108
## 1                             4053.6859                            5411.1298
## 2                              318.4661                             499.6300
## 3                              194.3493                             286.8203
## 4                              331.4864                             617.1513
## 5                              179.4221                             349.3431
## 6                              104.8067                             156.6732
## 7                              612.9029                             971.6622
## 8                             2402.8629                            4499.3759
## 9                             5620.9965                            6559.9749
## 10                             166.1504                             379.5810
##    Tumor_plus_NK_timepoint_2-S_68_R0108 Tumor_plus_NK_timepoint_2-S_69_R0108
## 1                             5068.2301                            5785.6571
## 2                              571.7537                             618.9396
## 3                              212.2716                             185.3203
## 4                              647.7775                             565.9427
## 5                              290.4707                             298.7725
## 6                              121.8675                             123.6037
## 7                              715.3963                             824.1747
## 8                             3518.5242                            3694.0642
## 9                             5686.0770                            5153.7038
## 10                             276.1582                             307.4031
##    Tumor_plus_NK_timepoint_2-S_94_R0108 Tumor_plus_NK_timepoint_2-S_95_R0108
## 1                            2853.68834                           3907.22481
## 2                             315.31412                            408.27858
## 3                              98.55720                            102.00342
## 4                             376.87513                            444.90837
## 5                             136.49872                            189.47599
## 6                              12.41389                             48.88943
## 7                             369.86090                            525.84934
## 8                             582.78057                           1490.16889
## 9                            6266.28309                           5556.06367
## 10                             75.01471                            102.07924
##    Tumor_plus_NK_timepoint_2-S_96_R0108 Tumor_plus_NK_timepoint_2-S_97_R0108
## 1                           3054.755772                           1612.10441
## 2                            343.314502                            268.09793
## 3                             48.835770                             79.95366
## 4                            407.082559                            210.56451
## 5                            127.347277                             48.09632
## 6                              7.643862                              0.00000
## 7                            368.781959                            129.49845
## 8                            556.921078                            155.88130
## 9                           5219.548551                           4777.95969
## 10                            80.833154                             24.79279
##    Tumor_plus_NK_timepoint_2-S_98_R0108 Tumor_plus_NK_timepoint_2-S_99_R0108
## 1                           3161.130849                          3365.620217
## 2                            335.293619                           353.560196
## 3                             94.332083                            91.340829
## 4                            258.349255                           233.479909
## 5                            151.191882                           143.746550
## 6                              9.586379                             4.714868
## 7                            397.607171                           488.790691
## 8                            964.039135                          1354.323735
## 9                           4752.982702                          5356.586333
## 10                           101.911465                           115.151248
##    Tumor_plus_NK_timepoint_2-S_100_R0108 Tumor_plus_NK_timepoint_2-S_101_R0108
## 1                             2248.20859                             3693.7522
## 2                              285.03959                              343.1971
## 3                              126.88052                              119.1217
## 4                              413.17662                              357.6320
## 5                               63.01274                              143.9118
## 6                               48.40394                               50.0309
## 7                              229.62176                              406.1431
## 8                              317.53985                             1435.0022
## 9                             5123.10203                             5736.0080
## 10                              41.94704                               99.5467
##    Tumor_plus_NK_timepoint_2-S_102_R0108 Tumor_plus_NK_timepoint_2-S_103_R0108
## 1                             2497.46950                            3970.89731
## 2                              257.96566                             393.79609
## 3                               99.21820                             230.91655
## 4                              419.01959                             561.65120
## 5                               58.51867                             146.73088
## 6                               31.59401                              83.58817
## 7                              224.34658                             568.56385
## 8                              377.55128                            1663.47315
## 9                             4886.22860                            5833.79498
## 10                              59.89536                             144.75629
##    Tumor_plus_NK_timepoint_2-S_104_R0108 Tumor_plus_NK_timepoint_2-S_25_R0114_L5311
## 1                             4100.50705                                 2451.84810
## 2                              442.56640                                  275.66064
## 3                              141.80837                                   24.51542
## 4                              678.64953                                  438.98600
## 5                              212.31066                                   53.73839
## 6                               67.29958                                    5.03997
## 7                              520.66380                                  152.25959
## 8                             1819.02296                                  112.74574
## 9                             6454.32469                                 6012.40889
## 10                             140.85471                                   48.22130
##    Tumor_plus_NK_timepoint_2-S_26_R0114_L5311 Tumor_plus_NK_timepoint_2-S_27_R0114_L5311
## 1                                 1413.182567                                2255.761783
## 2                                  196.748054                                 230.346953
## 3                                   13.427171                                  22.182839
## 4                                  312.802010                                 398.735564
## 5                                   42.510684                                  23.607159
## 6                                    7.504856                                   8.796351
## 7                                  169.835020                                 135.128724
## 8                                  130.822084                                  87.631803
## 9                                 3255.278358                                3864.183482
## 10                                  42.830920                                  12.685330
##    Tumor_plus_NK_timepoint_2-S_28_R0114_L5311 Tumor_plus_NK_timepoint_2-S_29_R0114_L5311
## 1                                1512.3967890                                 3662.44856
## 2                                 139.5622890                                  286.44692
## 3                                  57.1499529                                  142.65577
## 4                                 240.5679225                                  427.38939
## 5                                  31.4507777                                  128.38609
## 6                                   0.7673959                                   10.18098
## 7                                 119.9576377                                  369.55434
## 8                                  25.1898319                                  906.82660
## 9                                3509.5858480                                 4351.23008
## 10                                 22.4132504                                  124.62080
##    Tumor_plus_NK_timepoint_2-S_30_R0114_L5311 Tumor_plus_NK_timepoint_2-S_31_R0114_L5311
## 1                                  2026.09662                                1379.749209
## 2                                   151.23444                                 166.924585
## 3                                    65.04070                                  64.058822
## 4                                   311.23758                                 310.013115
## 5                                    44.45756                                  29.130493
## 6                                     0.00000                                   9.202746
## 7                                   216.07682                                 122.625541
## 8                                   138.24193                                  50.223875
## 9                                  3933.19026                                4127.948022
## 10                                   42.69281                                  32.439402
##    Tumor_plus_NK_timepoint_2-S_32_R0114_L5311 Tumor_plus_NK_timepoint_2-S_33_R0114_L5311
## 1                                  1007.45324                                 1090.70706
## 2                                   121.56246                                  137.02984
## 3                                    22.86767                                   34.30605
## 4                                   299.26832                                  256.26347
## 5                                    12.45582                                   17.03743
## 6                                    11.72777                                   10.37472
## 7                                    85.44529                                   88.40889
## 8                                    22.68844                                   18.53216
## 9                                  3051.54335                                 3363.10199
## 10                                   34.69610                                   26.12183
##    Tumor_plus_NK_timepoint_2-S_34_R0114_L5311 Tumor_plus_NK_timepoint_2-S_35_R0114_L5311
## 1                                  2545.25905                                 1889.68993
## 2                                   475.60652                                  375.51234
## 3                                   138.20748                                   91.00698
## 4                                   599.21008                                  413.73002
## 5                                    20.33720                                   16.11670
## 6                                    79.92778                                   25.93714
## 7                                   170.30992                                  117.15614
## 8                                    77.83998                                   52.85539
## 9                                  6938.32427                                 5117.73916
## 10                                   30.00380                                   25.41617
##    Tumor_plus_NK_timepoint_2-S_36_R0114_L5311
## 1                                 1899.567509
## 2                                  397.353609
## 3                                   99.386914
## 4                                  458.410596
## 5                                   27.913123
## 6                                    9.535085
## 7                                   90.224031
## 8                                   45.286888
## 9                                 6247.357109
## 10                                  28.809331
##  [ reached 'max' / getOption("max.print") -- omitted 480 rows ]
## 
## $de_details
##                                                               test
## 1 condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1
##                                         design signif_genes signif_genes_UP signif_genes_DOWN
## 1 ~experiment + cell_line_label + condition_tp          490             330               160
##                                                       cutoffs
## 1 (!is.na(padj) & (padj < 0.05)) & abs(log2FoldChange) > 0.58
## 
## $results_all
##            ensembl_id mgi_symbol                                         mgi_description entrezgene
## 1  ENSMUSG00000075602       Ly6a                   lymphocyte antigen 6 complex, locus A     110454
## 2  ENSMUSG00000060675     Plaat3                   phospholipase A and acyltransferase 3     225845
## 3  ENSMUSG00000033880   Lgals3bp lectin, galactoside-binding, soluble, 3 binding protein      19039
## 4  ENSMUSG00000064215      Ifi27                  interferon, alpha-inducible protein 27      52668
## 5  ENSMUSG00000104713       Gbp6                             guanylate binding protein 6     100702
## 6  ENSMUSG00000033355       Rtp4                          receptor transporter protein 4      67775
## 7  ENSMUSG00000026104      Stat1      signal transducer and activator of transcription 1      20846
## 8  ENSMUSG00000054072      Iigp1                           interferon inducible GTPase 1      60440
## 9  ENSMUSG00000061232      H2-K1                      histocompatibility 2, K1, K region      14972
## 10 ENSMUSG00000073555     Gm4951                                     predicted gene 4951     240327
##      baseMean MeanExpr_Tumor_only_timepoint_1 MeanExpr_Tumor_plus_NK_timepoint_2 log2FoldChange
## 1  2456.40621                      317.387691                         3080.57543      3.2258807
## 2   242.32525                       61.223411                          324.05583      2.3592201
## 3    96.84177                       11.627393                          119.52526      3.3142561
## 4   364.12250                      153.624891                          426.71025      1.4770706
## 5   128.50281                        6.047601                          135.68641      4.1428494
## 6    39.88500                        3.053751                           46.64006      3.7056800
## 7   342.79939                       79.385613                          393.15858      1.9873128
## 8  1112.75679                       42.307708                         1175.32669      4.3614229
## 9  4893.62031                     3035.265875                         5424.20556      0.8243463
## 10   95.71447                       10.450136                          109.08363      2.9734537
##         lfcSE FoldChange        pvalue          padj   gene_biotype chromosome_name start_position
## 1  0.10740032   9.355928 9.066006e-206 1.331071e-201 protein_coding              15       74994877
## 2  0.09136706   5.130929 2.798105e-150 2.054089e-146 protein_coding              19        7557459
## 3  0.15494094   9.946963 1.049868e-103 5.138053e-100 protein_coding              11      118392751
## 4  0.06968169   2.783829 6.176427e-102  2.267058e-98 protein_coding              12      103434211
## 5  0.23046748  17.665338  3.423172e-77  1.005180e-73 protein_coding               5      105270702
## 6  0.21981886  13.047305  6.073340e-65  1.486146e-61 protein_coding              16       23520291
## 7  0.12601002   3.964978  5.014857e-61  1.051830e-57 protein_coding               1       52119440
## 8  0.29663413  20.555077  5.585136e-58  1.025012e-54 protein_coding              18       60376027
## 9  0.05356110   1.770732  6.395728e-55  1.043356e-51 protein_coding              17       33996017
## 10 0.20013176   7.854142  1.474542e-54  2.164923e-51 protein_coding              18       60212080
##    end_position strand Tumor_only_timepoint_1-S_4_R0108 Tumor_only_timepoint_1-S_5_R0108
## 1      74998031     -1                      226.3811564                       341.817725
## 2       7588545      1                       28.0879052                        43.238856
## 3     118402092     -1                        1.7388754                        14.990335
## 4     103440239      1                       98.6588702                       122.576546
## 5     105293698     -1                        1.7271571                        14.889315
## 6      23614222      1                        0.8639205                         5.319718
## 7      52161865      1                       74.6331721                        79.944903
## 8      60392634      1                       29.1684966                        69.538617
## 9      34000347     -1                     2565.3386318                      2781.017989
## 10     60247820      1                        7.8307557                        15.001477
##    Tumor_only_timepoint_1-S_6_R0108 Tumor_only_timepoint_1-S_7_R0108
## 1                        288.630468                       148.275585
## 2                         39.019215                        34.333732
## 3                          6.625686                        13.855399
## 4                        100.911393                       160.638370
## 5                          3.290518                         5.004374
## 6                          0.000000                         2.503178
## 7                         61.756281                        70.215071
## 8                         59.599173                        54.614600
## 9                       2583.232844                      2773.171511
## 10                        16.576527                         5.042072
##    Tumor_only_timepoint_1-S_8_R0108 Tumor_only_timepoint_1-S_9_R0108
## 1                        273.597302                       161.953437
## 2                         65.456486                        53.728279
## 3                         18.428030                         6.908138
## 4                        155.662949                       139.898918
## 5                         12.920360                         0.000000
## 6                          7.539861                         6.864301
## 7                        109.368560                        91.134837
## 8                        118.646154                        30.616653
## 9                       2779.194878                      2806.570115
## 10                        10.848075                        11.522122
##    Tumor_only_timepoint_1-S_34_R0108 Tumor_only_timepoint_1-S_35_R0108
## 1                         297.822212                        351.120560
## 2                          64.761167                         60.833866
## 3                           6.168086                          4.108498
## 4                          75.784308                        108.644603
## 5                           6.126519                          5.441081
## 6                           1.225789                          4.082427
## 7                          71.821417                         79.658971
## 8                          30.861415                         41.956265
## 9                        3064.580619                       3345.985933
## 10                          9.876273                          8.223104
##    Tumor_only_timepoint_1-S_36_R0108 Tumor_only_timepoint_1-S_37_R0108
## 1                         420.097841                        290.666139
## 2                          47.844025                         43.135620
## 3                           8.124189                          2.359208
## 4                          78.863488                        126.747439
## 5                          16.138880                          0.000000
## 6                           6.727196                          0.000000
## 7                         106.799762                        101.535313
## 8                          41.444818                         27.141134
## 9                        2950.083088                       3153.145857
## 10                         18.970531                          2.360961
##    Tumor_only_timepoint_1-S_38_R0108 Tumor_only_timepoint_1-S_39_R0108
## 1                         402.564866                        406.966531
## 2                          57.841038                         84.857099
## 3                           9.171110                         19.052192
## 4                         178.052411                        166.780680
## 5                           7.085016                          6.678988
## 6                           2.025092                          4.454422
## 7                          97.821519                         96.012927
## 8                          82.837569                         97.044997
## 9                        3091.523270                       3465.008042
## 10                         12.237236                         12.337052
##    Tumor_only_timepoint_1-S_40_R0108 Tumor_only_timepoint_1-S_41_R0108
## 1                         316.059288                         457.71021
## 2                          55.206801                          64.08933
## 3                          16.929106                          17.31344
## 4                         139.644242                         135.22537
## 5                           6.005364                           9.55376
## 6                           4.806194                           6.69028
## 7                          52.263737                          88.42372
## 8                          28.980145                         101.49043
## 9                        2911.020108                        3081.24480
## 10                         14.521448                          23.10175
##    Tumor_only_timepoint_1-S_42_R0108 Tumor_only_timepoint_1-S_43_R0108
## 1                          480.48704                        199.771467
## 2                           54.28901                         81.112141
## 3                           32.73265                         20.566199
## 4                          185.68004                        215.221101
## 5                           19.73947                          9.676233
## 6                            4.64642                          3.226688
## 7                          103.96059                         87.287023
## 8                          104.66037                         34.836180
## 9                         3006.12358                       3010.527465
## 10                          14.03871                         11.915597
##    Tumor_only_timepoint_1-S_44_R0108 Tumor_only_timepoint_1-S_45_R0108
## 1                         229.257848                        203.016839
## 2                         101.769822                         60.487194
## 3                          19.150569                         18.594181
## 4                         203.126981                        201.245183
## 5                           0.000000                          0.000000
## 6                           1.189315                          8.211639
## 7                          95.951230                         66.074768
## 8                          44.097532                         36.432202
## 9                        2601.755822                       3026.664063
## 10                          5.989001                          8.270223
##    Tumor_only_timepoint_1-S_70_R0108 Tumor_only_timepoint_1-S_71_R0108
## 1                         440.139715                        323.892923
## 2                          48.336005                         57.581463
## 3                          11.796622                          9.871678
## 4                         110.169108                         89.218043
## 5                          15.232262                          8.715691
## 6                           1.172176                          1.089893
## 7                         100.600029                         66.246309
## 8                         158.691062                         73.283414
## 9                        2943.392418                       3247.204251
## 10                          7.083234                          7.683679
##    Tumor_only_timepoint_1-S_72_R0108 Tumor_only_timepoint_1-S_73_R0108
## 1                         265.136089                        459.069572
## 2                          28.606686                         43.864671
## 3                           6.839694                         19.749736
## 4                         111.856885                        106.104620
## 5                           0.000000                         22.559139
## 6                           3.883595                          4.906103
## 7                          94.042735                         90.517228
## 8                          37.192036                        104.972199
## 9                        2796.872346                       2890.697668
## 10                          4.889127                         13.835091
##    Tumor_only_timepoint_1-S_74_R0108 Tumor_only_timepoint_1-S_75_R0108
## 1                         354.799205                        318.324971
## 2                          57.523615                         59.434830
## 3                           5.907787                          4.709775
## 4                          85.429245                         83.949419
## 5                           3.520785                          7.017053
## 6                           1.174060                          2.339944
## 7                          84.495940                         81.791847
## 8                          14.314338                         39.161395
## 9                        2474.143255                       2743.189753
## 10                         15.371664                         10.604869
##    Tumor_only_timepoint_1-S_76_R0108 Tumor_only_timepoint_1-S_77_R0108
## 1                         174.547255                        289.569693
## 2                          70.389334                         88.539703
## 3                           6.577640                         13.916724
## 4                         100.399813                        165.883122
## 5                           0.000000                         15.797644
## 6                           5.228720                          5.926462
## 7                          53.714433                         82.068469
## 8                          23.102968                         86.155960
## 9                        2747.646997                       3257.905502
## 10                          3.949517                         11.937487
##    Tumor_only_timepoint_1-S_78_R0108 Tumor_only_timepoint_1-S_79_R0108
## 1                         226.481125                        217.422651
## 2                          52.089351                         71.157408
## 3                           6.879499                         12.983855
## 4                         114.182699                        178.061376
## 5                          11.388562                          8.290515
## 6                           1.139307                          9.215331
## 7                          74.665224                         70.239239
## 8                          43.979256                         44.271868
## 9                        2742.242177                       3008.792871
## 10                         11.474353                          9.281076
##    Tumor_only_timepoint_1-S_80_R0108 Tumor_only_timepoint_1-S_81_R0108
## 1                         237.785720                        199.559990
## 2                          65.070061                         73.968560
## 3                           9.310747                         13.759217
## 4                         193.700233                        226.385809
## 5                           6.473601                          9.461419
## 6                           3.700666                          7.361795
## 7                          81.003262                         83.840786
## 8                          48.784798                         33.382367
## 9                        2553.959914                       2921.514493
## 10                         11.181201                         10.591880
##    Tumor_only_timepoint_1-S_1_R0114_L5311 Tumor_only_timepoint_1-S_2_R0114_L5311
## 1                             505.9655486                            534.8699989
## 2                              60.9952416                             83.8498816
## 3                               6.0972113                              9.4535404
## 4                             138.0414336                            148.5011370
## 5                               1.1011131                              1.1177423
## 6                               0.5507746                              0.6709679
## 7                              88.9047202                             39.0957467
## 8                               1.5496474                              4.3073388
## 9                            4343.4360644                           4697.6721201
## 10                              6.1017433                              1.3515096
##    Tumor_only_timepoint_1-S_3_R0114_L5311 Tumor_only_timepoint_1-S_4_R0114_L5311
## 1                              398.278202                            349.7592985
## 2                               81.552390                             20.4637753
## 3                                0.000000                             12.7626253
## 4                              125.031832                            117.7537527
## 5                                4.732983                              2.2370502
## 6                                0.000000                              0.0000000
## 7                               74.366562                             76.4479256
## 8                                3.227663                              0.6996231
## 9                             3541.985255                           2258.0354224
## 10                               3.577009                              6.7617062
##    Tumor_only_timepoint_1-S_5_R0114_L5311 Tumor_only_timepoint_1-S_6_R0114_L5311
## 1                             400.4039886                            378.5871716
## 2                              36.2312671                             32.1178943
## 3                               3.6620692                              0.7574725
## 4                             150.1706045                            186.3597761
## 5                               0.7274781                              0.0000000
## 6                               2.1832985                              0.0000000
## 7                              83.9293788                             81.2914481
## 8                               5.2486655                              4.9412632
## 9                            3216.1610799                           2904.2991488
## 10                             16.8580396                              5.3062488
##    Tumor_only_timepoint_1-S_7_R0114_L5311 Tumor_only_timepoint_1-S_8_R0114_L5311
## 1                              405.189817                             345.041272
## 2                              136.987058                              66.942635
## 3                               27.320185                              11.367242
## 4                              270.690680                             216.884210
## 5                                2.713607                               1.254515
## 6                                0.000000                               0.000000
## 7                               56.265323                              64.204041
## 8                                0.000000                               1.177025
## 9                             4556.187967                            2642.948039
## 10                              17.315645                               6.319829
##    Tumor_only_timepoint_1-S_9_R0114_L5311 Tumor_only_timepoint_1-S_10_R0114_L5311
## 1                              321.561946                             303.5678649
## 2                               64.563574                              57.1540398
## 3                                4.263492                              10.2217886
## 4                              189.936624                             226.7748238
## 5                                4.940553                               0.0000000
## 6                                0.000000                               1.5626037
## 7                               65.269987                              83.8668240
## 8                                5.284401                               0.7327513
## 9                             3465.607292                            3154.0607381
## 10                               4.977771                               5.5081311
##    Tumor_only_timepoint_1-S_11_R0114_L5311 Tumor_only_timepoint_1-S_12_R0114_L5311
## 1                              177.4169334                              206.715578
## 2                               85.4001696                               88.472046
## 3                               26.9457231                               16.380283
## 4                              240.2594438                              283.137834
## 5                                0.0000000                                2.440484
## 6                                0.8924911                                4.882902
## 7                               62.2162061                               60.448264
## 8                                3.3481249                                5.148823
## 9                             2508.3364761                             2868.686873
## 10                              25.1680349                               13.113966
##    Tumor_plus_NK_timepoint_2-S_16_R0108 Tumor_plus_NK_timepoint_2-S_17_R0108
## 1                            1747.63424                           2043.64240
## 2                             181.27471                            176.89550
## 3                              84.16812                            115.67506
## 4                             379.78296                            376.28100
## 5                              28.98165                             32.40643
## 6                              42.37457                             63.85612
## 7                             122.06926                            141.00851
## 8                             106.58488                            116.17965
## 9                            6905.47407                           6449.94761
## 10                             24.70767                             37.59760
##    Tumor_plus_NK_timepoint_2-S_18_R0108 Tumor_plus_NK_timepoint_2-S_19_R0108
## 1                            1961.66038                           2169.31465
## 2                             208.42627                            252.79548
## 3                             131.79911                            126.70198
## 4                             393.85502                            640.51907
## 5                              38.00639                             67.76438
## 6                              63.36908                             58.10676
## 7                             166.78004                            210.28623
## 8                             203.35784                            306.30693
## 9                            6295.43975                           5827.26955
## 10                             44.67482                             32.51184
##    Tumor_plus_NK_timepoint_2-S_20_R0108 Tumor_plus_NK_timepoint_2-S_21_R0108
## 1                            1663.59370                           1719.85123
## 2                             297.94389                            307.26016
## 3                             148.16929                            124.86695
## 4                             566.98286                            551.29567
## 5                              40.31177                             56.01150
## 6                              59.30549                             44.02647
## 7                             197.19845                            203.08716
## 8                             240.53894                            271.33505
## 9                            5807.39212                           5961.36343
## 10                             27.47515                             40.30960
##    Tumor_plus_NK_timepoint_2-S_58_R0108 Tumor_plus_NK_timepoint_2-S_59_R0108
## 1                            6488.80633                           6112.69599
## 2                             503.96062                            519.00145
## 3                             197.54182                            172.40962
## 4                             600.59927                            521.49861
## 5                             549.18734                            461.83058
## 6                              82.96721                             61.25829
## 7                            1193.43729                           1284.76246
## 8                            4667.39493                           4185.81629
## 9                            6607.43373                           6610.74395
## 10                            335.25549                            308.47662
##    Tumor_plus_NK_timepoint_2-S_60_R0108 Tumor_plus_NK_timepoint_2-S_61_R0108
## 1                            6934.22563                           2570.28564
## 2                             517.09769                            326.79839
## 3                             141.77686                            120.59119
## 4                             608.79137                            319.08203
## 5                             528.39189                             77.95110
## 6                              57.34824                             23.77499
## 7                            1221.99259                            292.64331
## 8                            4105.83752                            573.06006
## 9                            6155.02388                           5435.19475
## 10                            297.57603                             69.91826
##    Tumor_plus_NK_timepoint_2-S_62_R0108 Tumor_plus_NK_timepoint_2-S_63_R0108
## 1                            3412.69962                           2379.91330
## 2                             331.81238                            309.76795
## 3                             128.80487                            114.34307
## 4                             353.07839                            340.29375
## 5                             176.34539                            130.09215
## 6                              17.29561                             17.55907
## 7                             384.82494                            274.10965
## 8                            1045.62747                            621.84628
## 9                            5600.59252                           5799.93438
## 10                            130.06187                            100.90474
##    Tumor_plus_NK_timepoint_2-S_64_R0108 Tumor_plus_NK_timepoint_2-S_65_R0108
## 1                             4339.3412                            4841.7061
## 2                              301.9529                             342.1000
## 3                              213.3512                             193.8010
## 4                              443.5946                             409.1050
## 5                              146.5486                             243.3223
## 6                              147.5576                             123.3321
## 7                              606.5484                             629.9075
## 8                             2337.2736                            2906.8185
## 9                             6228.9387                            5952.1540
## 10                             212.5692                             223.3637
##    Tumor_plus_NK_timepoint_2-S_66_R0108 Tumor_plus_NK_timepoint_2-S_67_R0108
## 1                             4053.6859                            5411.1298
## 2                              318.4661                             499.6300
## 3                              194.3493                             286.8203
## 4                              331.4864                             617.1513
## 5                              179.4221                             349.3431
## 6                              104.8067                             156.6732
## 7                              612.9029                             971.6622
## 8                             2402.8629                            4499.3759
## 9                             5620.9965                            6559.9749
## 10                             166.1504                             379.5810
##    Tumor_plus_NK_timepoint_2-S_68_R0108 Tumor_plus_NK_timepoint_2-S_69_R0108
## 1                             5068.2301                            5785.6571
## 2                              571.7537                             618.9396
## 3                              212.2716                             185.3203
## 4                              647.7775                             565.9427
## 5                              290.4707                             298.7725
## 6                              121.8675                             123.6037
## 7                              715.3963                             824.1747
## 8                             3518.5242                            3694.0642
## 9                             5686.0770                            5153.7038
## 10                             276.1582                             307.4031
##    Tumor_plus_NK_timepoint_2-S_94_R0108 Tumor_plus_NK_timepoint_2-S_95_R0108
## 1                            2853.68834                           3907.22481
## 2                             315.31412                            408.27858
## 3                              98.55720                            102.00342
## 4                             376.87513                            444.90837
## 5                             136.49872                            189.47599
## 6                              12.41389                             48.88943
## 7                             369.86090                            525.84934
## 8                             582.78057                           1490.16889
## 9                            6266.28309                           5556.06367
## 10                             75.01471                            102.07924
##    Tumor_plus_NK_timepoint_2-S_96_R0108 Tumor_plus_NK_timepoint_2-S_97_R0108
## 1                           3054.755772                           1612.10441
## 2                            343.314502                            268.09793
## 3                             48.835770                             79.95366
## 4                            407.082559                            210.56451
## 5                            127.347277                             48.09632
## 6                              7.643862                              0.00000
## 7                            368.781959                            129.49845
## 8                            556.921078                            155.88130
## 9                           5219.548551                           4777.95969
## 10                            80.833154                             24.79279
##    Tumor_plus_NK_timepoint_2-S_98_R0108 Tumor_plus_NK_timepoint_2-S_99_R0108
## 1                           3161.130849                          3365.620217
## 2                            335.293619                           353.560196
## 3                             94.332083                            91.340829
## 4                            258.349255                           233.479909
## 5                            151.191882                           143.746550
## 6                              9.586379                             4.714868
## 7                            397.607171                           488.790691
## 8                            964.039135                          1354.323735
## 9                           4752.982702                          5356.586333
## 10                           101.911465                           115.151248
##    Tumor_plus_NK_timepoint_2-S_100_R0108 Tumor_plus_NK_timepoint_2-S_101_R0108
## 1                             2248.20859                             3693.7522
## 2                              285.03959                              343.1971
## 3                              126.88052                              119.1217
## 4                              413.17662                              357.6320
## 5                               63.01274                              143.9118
## 6                               48.40394                               50.0309
## 7                              229.62176                              406.1431
## 8                              317.53985                             1435.0022
## 9                             5123.10203                             5736.0080
## 10                              41.94704                               99.5467
##    Tumor_plus_NK_timepoint_2-S_102_R0108 Tumor_plus_NK_timepoint_2-S_103_R0108
## 1                             2497.46950                            3970.89731
## 2                              257.96566                             393.79609
## 3                               99.21820                             230.91655
## 4                              419.01959                             561.65120
## 5                               58.51867                             146.73088
## 6                               31.59401                              83.58817
## 7                              224.34658                             568.56385
## 8                              377.55128                            1663.47315
## 9                             4886.22860                            5833.79498
## 10                              59.89536                             144.75629
##    Tumor_plus_NK_timepoint_2-S_104_R0108 Tumor_plus_NK_timepoint_2-S_25_R0114_L5311
## 1                             4100.50705                                 2451.84810
## 2                              442.56640                                  275.66064
## 3                              141.80837                                   24.51542
## 4                              678.64953                                  438.98600
## 5                              212.31066                                   53.73839
## 6                               67.29958                                    5.03997
## 7                              520.66380                                  152.25959
## 8                             1819.02296                                  112.74574
## 9                             6454.32469                                 6012.40889
## 10                             140.85471                                   48.22130
##    Tumor_plus_NK_timepoint_2-S_26_R0114_L5311 Tumor_plus_NK_timepoint_2-S_27_R0114_L5311
## 1                                 1413.182567                                2255.761783
## 2                                  196.748054                                 230.346953
## 3                                   13.427171                                  22.182839
## 4                                  312.802010                                 398.735564
## 5                                   42.510684                                  23.607159
## 6                                    7.504856                                   8.796351
## 7                                  169.835020                                 135.128724
## 8                                  130.822084                                  87.631803
## 9                                 3255.278358                                3864.183482
## 10                                  42.830920                                  12.685330
##    Tumor_plus_NK_timepoint_2-S_28_R0114_L5311 Tumor_plus_NK_timepoint_2-S_29_R0114_L5311
## 1                                1512.3967890                                 3662.44856
## 2                                 139.5622890                                  286.44692
## 3                                  57.1499529                                  142.65577
## 4                                 240.5679225                                  427.38939
## 5                                  31.4507777                                  128.38609
## 6                                   0.7673959                                   10.18098
## 7                                 119.9576377                                  369.55434
## 8                                  25.1898319                                  906.82660
## 9                                3509.5858480                                 4351.23008
## 10                                 22.4132504                                  124.62080
##    Tumor_plus_NK_timepoint_2-S_30_R0114_L5311 Tumor_plus_NK_timepoint_2-S_31_R0114_L5311
## 1                                  2026.09662                                1379.749209
## 2                                   151.23444                                 166.924585
## 3                                    65.04070                                  64.058822
## 4                                   311.23758                                 310.013115
## 5                                    44.45756                                  29.130493
## 6                                     0.00000                                   9.202746
## 7                                   216.07682                                 122.625541
## 8                                   138.24193                                  50.223875
## 9                                  3933.19026                                4127.948022
## 10                                   42.69281                                  32.439402
##    Tumor_plus_NK_timepoint_2-S_32_R0114_L5311 Tumor_plus_NK_timepoint_2-S_33_R0114_L5311
## 1                                  1007.45324                                 1090.70706
## 2                                   121.56246                                  137.02984
## 3                                    22.86767                                   34.30605
## 4                                   299.26832                                  256.26347
## 5                                    12.45582                                   17.03743
## 6                                    11.72777                                   10.37472
## 7                                    85.44529                                   88.40889
## 8                                    22.68844                                   18.53216
## 9                                  3051.54335                                 3363.10199
## 10                                   34.69610                                   26.12183
##    Tumor_plus_NK_timepoint_2-S_34_R0114_L5311 Tumor_plus_NK_timepoint_2-S_35_R0114_L5311
## 1                                  2545.25905                                 1889.68993
## 2                                   475.60652                                  375.51234
## 3                                   138.20748                                   91.00698
## 4                                   599.21008                                  413.73002
## 5                                    20.33720                                   16.11670
## 6                                    79.92778                                   25.93714
## 7                                   170.30992                                  117.15614
## 8                                    77.83998                                   52.85539
## 9                                  6938.32427                                 5117.73916
## 10                                   30.00380                                   25.41617
##    Tumor_plus_NK_timepoint_2-S_36_R0114_L5311
## 1                                 1899.567509
## 2                                  397.353609
## 3                                   99.386914
## 4                                  458.410596
## 5                                   27.913123
## 6                                    9.535085
## 7                                   90.224031
## 8                                   45.286888
## 9                                 6247.357109
## 10                                  28.809331
##  [ reached 'max' / getOption("max.print") -- omitted 14987 rows ]
metadata_heatmap <- as.data.frame(colData(dds_subset))
transf_object <- vsd_subset_filt
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???

heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]

annotation_col <- metadata_heatmap %>%
  dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting, 
  dplyr::arrange(condition_tp, cell_line_label)

heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]

ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
  dplyr::select(ensembl_id, mgi_symbol)


# select top 20 and 10 gens from ab cd up and down regulated
AB_res_path <- "~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_AB/deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx"
CD_res_path <- "~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_CD/deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx"

AB_results <- openxlsx::read.xlsx(xlsxFile = AB_res_path, sheet = "results_signif")
CD_results <- openxlsx::read.xlsx(xlsxFile = CD_res_path, sheet = "results_signif")

AB_CD_intersect_genes <- intersect(AB_results$mgi_symbol, CD_results$mgi_symbol)

AB_res_subset <- AB_results[AB_results$mgi_symbol %in% AB_CD_intersect_genes,]
CD_res_subset <- CD_results[CD_results$mgi_symbol %in% AB_CD_intersect_genes,]

combined_AB_CD <- rbind.data.frame(AB_res_subset[,1:14], CD_res_subset[,1:14])

AB_CD_res_subset_UP <- combined_AB_CD[combined_AB_CD$log2FoldChange > 0,]
AB_CD_res_subset_DOWN <- combined_AB_CD[combined_AB_CD$log2FoldChange < 0,]

AB_CD_res_subset_UP <- AB_CD_res_subset_UP %>% arrange(padj)
AB_CD_res_subset_UP <- AB_CD_res_subset_UP[!duplicated(AB_CD_res_subset_UP$mgi_symbol),] %>% slice_head(n=20)

AB_CD_res_subset_DOWN <- AB_CD_res_subset_DOWN %>% arrange(padj)
AB_CD_res_subset_DOWN <- AB_CD_res_subset_DOWN[!duplicated(AB_CD_res_subset_DOWN$mgi_symbol),]%>% slice_head(n=10)

geneset <- c(AB_CD_res_subset_UP$mgi_symbol, AB_CD_res_subset_DOWN$mgi_symbol)


ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]

indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]


rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol


######

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

ann_colors = list(
  experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
  condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
  cell_line_label = c( A = "#E9D8A6", B = "#D9BE6D", C = "#676F54", D = "#395B50")
)

heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                   main = "Heatmap of signif. DEG",
                   scale = "row",
                   annotation_col = annotation_col,
                   annotation_colors = ann_colors,
                   #annotation_row = row_annot_symbols,
                   show_colnames = FALSE,
                   cluster_cols = FALSE,
                   cluster_rows = FALSE,
                   show_rownames = TRUE,
                   color = color.scheme,
                   fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
                   ) # 

<img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03”try to plot in the big, integrated DATASET”-1.png” width=“672” />

ggsave(filename = paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")

ggsave(filename = paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"), 
       plot=heatmap_corrected,
       width = 30, height = 14, units = "cm")

Results - EXP9.8 comparing WT vs KO, IFNG and PrfKO_NK timepoints

# Loading the required dependencies:
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)
# import utils scripts
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", .character_only=TRUE)
import::from(.from = here::here("utils/generateResults.R"), "generateResults_upd", .character_only=TRUE) # 
import::from(.from = here::here("utils/plotDegResults.R"), "plotVolcano","plotVolcano_repel", .character_only=TRUE)
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filtering
#project setup
# loading config file ----
config_file <- file.path( "nk_tum_immunoediting_config.yaml")  # params$config_file; params not found? ; change to project_config.yaml
config <- yaml::read_yaml(config_file)
#config <- yaml::read_yaml(params$config_file)

experiment_name="nk_tum_immunoedit_complete_ab2" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1"  # used only to construct initial dds object
abs_filt_samples=3

# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"

# directories
output_dir = paste0("./results_dir", "/",experiment_name,"/")
dir.create(output_dir)
# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup)  # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = tximport, tximport)
import::from(.from = DESeq2, .all=TRUE)

#table(metadata$condition_tp, metadata$experiment)
# Create a list of comparisons:
# Full list
# list_of_comparisons <- list(c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_2"),
#                             c("Tumor_plus_IFNg_timepoint_2", "Tumor_only_timepoint_2"),
#                             c("Tumor_plus_PrfKO_NK_timepoint_2", "Tumor_only_timepoint_2"),
#                             c("Tumor_plus_WT_NK_timepoint_2", "Tumor_plus_IFNg_timepoint_2"),
#                             c("Tumor_plus_WT_NK_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"),
#                             c("Tumor_only_timepoint_0", "Tumor_only_timepoint_1"),
#                             c("Tumor_only_timepoint_0", "Tumor_only_timepoint_2"),
#                             c("Tumor_only_timepoint_1", "Tumor_only_timepoint_2"),
#                             c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_1"))

list_of_comparisons <- list(c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_2"),
                            c("Tumor_plus_IFNg_timepoint_2", "Tumor_only_timepoint_2"),
                            c("Tumor_plus_PrfKO_NK_timepoint_2", "Tumor_only_timepoint_2"))

experiment_name="nk_tum_immunoedit_complete"
output_dir <- "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/"

# Loading the data
# raw and filtered dds
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData") #"nk_tum_immunoedit_complete_dds_objects.RData" 
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# log2 and vsd transformed
precomputed_transf_objects_filename <- "nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# check if object loaded if not load
if(!exists(precomputed_objects_file)){
  load(precomputed_objects_file)
  load(precomputed_transf_objects_file)
} 

# defining parameters
abs_filt_samples=3
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6         # at least 60% variance explained needed

experiment_subset <- c("EXP9.8") #we use only this experiment
if(exists("deg_design")) {rm(deg_design)}
deg_design <- as.formula("~ cell_line_label + condition_tp") #Experiment is missing here, since we have only EXP9.8

# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE) 
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE) 
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE) 
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE) 
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE) 

genes_of_interest <- list("Denn2b"="ENSMUSG00000031024",
                          "Gbp6"="ENSMUSG00000104713",
                          "H2-K1"="ENSMUSG00000061232",
                          "Hs3st2"="ENSMUSG00000046321",
                          "Htra3"="ENSMUSG00000029096",
                          "Ifi27"="ENSMUSG00000064215",
                          "Igtp"="ENSMUSG00000078853",
                          "Irf1"="ENSMUSG00000018899",
                          "Lgals3bp"="ENSMUSG00000033880",
                          "Ly6a"="ENSMUSG00000075602",
                          "Plaat3"="ENSMUSG00000060675",
                          "Rtp4"="ENSMUSG00000033355",
                          "Serpina3f"="ENSMUSG00000066363",
                          "Stat1"="ENSMUSG00000026104")

genes_of_interest_exp9.8 <- list("H2-Eb1" = "ENSMUSG00000060586",
                                 "Slc16a3"= "ENSMUSG00000025161",
                                 "H2-Aa"  = "ENSMUSG00000036594",
                                 "Bnip3"  = "ENSMUSG00000078566",
                                 "Il1r2"  = "ENSMUSG00000026073",
                                 "Dapk2"  = "ENSMUSG00000032380",
                                 "Zfhx3"  = "ENSMUSG00000038872",
                                 "Dpp7"   = "ENSMUSG00000026958")


#specifically for the publication 
genes_of_interest_exp9.4_9.7 <- list('Ifi44'     ="ENSMUSG00000028037", 
                                     'Ccl5'      ="ENSMUSG00000035042",
                                     'Iigp1'     ="ENSMUSG00000054072",
                                     'Serpina3f' ="ENSMUSG00000066363",
                                     'Ly6a'      ="ENSMUSG00000075602",
                                     'Plaat3'    ="ENSMUSG00000060675",
                                     'Lgals3bp'  ="ENSMUSG00000033880",
                                     'Ifi27'     ="ENSMUSG00000064215",
                                     'Gbp6'      ="ENSMUSG00000104713",
                                     'Rtp4'      ="ENSMUSG00000033355",
                                     'Stat1'     ="ENSMUSG00000026104",
                                     'Tap1'      ="ENSMUSG00000037321",
                                     'H2-K1'     ="ENSMUSG00000061232",
                                     'B2m'       ="ENSMUSG00000060802",
                                     'Igtp'      ="ENSMUSG00000078853",
                                     'Gtf2ird1'  ="ENSMUSG00000023079",
                                     'Cdc42ep4'  ="ENSMUSG00000041598",
                                     'Hoxb6'     ="ENSMUSG00000000690",
                                     'Bfsp2'     ="ENSMUSG00000032556",
                                     'Sox6'      ="ENSMUSG00000051910",
                                     'Tgm2'      ="ENSMUSG00000037820",
                                     'Adprhl1'   ="ENSMUSG00000031448")
# this will be a counter 
cid <- 0

#Star of the loop
for (comparison_i in list_of_comparisons){
  cid <- cid + 1
  #determine the name of the comparison
  deg_name <- paste0(cid, comparison_i[1], comparison_i[2]) #"c1_Tumor_plus_WT_NK_timepoint_2_VS_Tumor_only_timepoint_2"
  deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
  dir.create(deg_dir)
  
  #this is the tow sample that we are going to compare between
  condition_tp_subset <- comparison_i
  
  # preparing dds and vsd objects for the main project comparison
  precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
  precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
  
  precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
  precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
  
  # just in case remove any existing subsets
  if(exists("dds_subset")) {rm(dds_subset)}
  if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
  if(exists("vsd_subset")) {rm(vsd_subset)}
  
  if(!file.exists(precomputed_objects_deg_file)){
    
    cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
    cat("generating", precomputed_objects_deg_filename, "\n")
    
    dds_subset <- dds_filt[ , (dds_filt$condition_tp %in% condition_tp_subset) & (dds_filt$experiment %in% experiment_subset)]  
    
    # Dropping levels
    dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
    table(dds_subset$condition_tp)
    dds_subset$experiment <- droplevels(dds_subset$experiment)
    table(dds_subset$experiment)
    dds_subset$experiment <- droplevels(dds_subset$experiment)
    dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
    
    # filtering lowly expressed genes
    dds_subset_filt <- filterDatasets(dds_subset, 
                                      abs_filt = TRUE, 
                                      abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
    
    design(dds_subset_filt) <- deg_design
    dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
    dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
    
    cat("saving...", precomputed_objects_file, "\n")
    cat("...into:", output_dir, "\n")
    save(dds_subset, dds_subset_filt, ensemblAnnot,  file = precomputed_objects_deg_file)
    
  } else{
    cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
    load(precomputed_objects_deg_file)
  }
  
  #transformation after filtering
  if(!file.exists(precomputed_transf_objects_deg_file)){
    # transformation
    log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
    vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
    save(log2_norm_subset_filt, vsd_subset_filt,   
         file = precomputed_transf_objects_deg_file)
  } else{
    load(precomputed_transf_objects_deg_file)
  }
  
  rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previous runs
  key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
                         dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate) 
  
  key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)), 
                                                     strata = c("condition_tp"), 
                                                     data = key_metadata_subset)
  
  tableone::kableone(key_variables_tableOne$CatTable,
                     caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
                    kableExtra::kable_material(c("striped", "hover"))
  
  transf_object <- vsd_subset_filt
  
  #Before removing Batch Effect
  pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object, 
                                       cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                                       color_variable = "condition_tp", 
                                       shape_variable = "cell_line_label",
                                       ntop_genes = 1000) +
                            ggtitle(paste(comparison_i, "Comparison. Dataset Before Batch Correction"))
  #plot(pca_deg_plotCellLabel)
  
  # Remove batch effect (only for the cell_line_label)
  transf_batch_NObatch_cell_label <- vsd_subset_filt
  transf_batch_NObatch_cell_label_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_cell_label), 
                                                                    transf_batch_NObatch_cell_label$cell_line_label)
  SummarizedExperiment::assay(transf_batch_NObatch_cell_label) <- transf_batch_NObatch_cell_label_count
  
  pca_deg_NObatch_cell_label <- generatePCA(transf_object = transf_batch_NObatch_cell_label, 
                                            cond_interest_varPart = c("condition_tp", "cell_line_label"), 
                                            color_variable = "condition_tp", 
                                            shape_variable = "cell_line_label",
                                            ntop_genes = 1000) +
                            ggtitle(paste(comparison_i, "Comparison. Removed batchEffect - cell_line"))
  #plot(pca_deg_NObatch_cell_label)
  
  PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel, 
                                                           pca_deg_NObatch_cell_label, 
                                                           common.legend = TRUE)
  
  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "batch_correction.png"), 
         plot=PCA_batch_highlightCellLines_merged,
         width = 40, height = 20, units = "cm")

  
  #Generate results
  deg_comparison_results <- generateResults_upd(dds_object = dds_subset_filt,
                                                          coeff_name = resultsNames(dds_subset_filt)[3],
                                                          cond_numerator = comparison_i[1],
                                                          cond_denominator = comparison_i[2],
                                                          cond_variable="condition_tp")
  #export it as xlsx table
  openxlsx::write.xlsx(deg_comparison_results, file = file.path(deg_dir, paste0("deg", comparison_i[1], '_vs_', comparison_i[2], "_results.xlsx")))

  #generating GOBP GOCC GOMF
  deg_comparison_results_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
                                                            genesets = gs_C5_GOBP,
                                                            test="hypergeometric",
                                                            background=nrow(dds_subset_filt))
  deg_comparison_results_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
                                                            genesets = gs_C5_GOCC,
                                                            test="hypergeometric",
                                                            background=nrow(dds_subset_filt))
  deg_comparison_results_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
                                                            genesets = gs_C5_GOMF,
                                                            test="hypergeometric",
                                                            background=nrow(dds_subset_filt))

  # hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
  hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOBP, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOBP.xlsx")))
  hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOCC, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOCC.xlsx")))
  hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOMF, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOMF.xlsx")))
  # plotting enrichment results ----
  deg_comparison_results_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOBP: ",comparison_i[1], comparison_i[2]))
  deg_comparison_results_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOCC: ",comparison_i[1], comparison_i[2]))
  deg_comparison_results_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOMF: ",comparison_i[1], comparison_i[2]))

  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOBP_plot.png"),
         plot=deg_comparison_results_enrichment_C5_GOBP_plot,
         width = 20, height = 20, units = "cm")

  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOCC_plot.png"),
         plot=deg_comparison_results_enrichment_C5_GOCC_plot,
         width = 20, height = 20, units = "cm")

  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOMF_plot.png"),
         plot=deg_comparison_results_enrichment_C5_GOMF_plot,
         width = 20, height = 20, units = "cm")
  
  # volcano plot
  deg_comparison_results_volcano_plot <- plotVolcano_repel(dds_results_obj = deg_comparison_results$results_all,
                                                    genes_of_interest = genes_of_interest,
                                                    genes_of_interest2 = genes_of_interest_exp9.8,
                                                    plot_title = paste0(comparison_i[1], ' vs ', comparison_i[2]),
                                                    max_overlaps = 30)

  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_volcano_plot.png"),
         plot=deg_comparison_results_volcano_plot,
         width = 20, height = 20, units = "cm")

  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_volcano_plot.eps"),
         plot=deg_comparison_results_volcano_plot,
         width = 20, height = 20, units = "cm")
  
  print(deg_comparison_results_volcano_plot)

  # heatmap and heatmap batch corrected - corrected
  
  save_pheatmap_pdf <- function(x, filename, width=7, height=7) {
    stopifnot(!missing(x))
    stopifnot(!missing(filename))
    pdf(filename, width=width, height=height)
    grid::grid.newpage()
    grid::grid.draw(x$gtable)
    dev.off()
  }
  
  metadata_heatmap <- as.data.frame(colData(dds_subset))
  heatmap_counts <- SummarizedExperiment::assay(transf_object)
  heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_cell_label) # removed xperiment batch effects! ? use experimen+cell_line removed???
  heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_comparison_results$results_signif$ensembl_id, ]
  heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_comparison_results$results_signif$ensembl_id, ]
  annotation_col <- metadata_heatmap %>%
    dplyr::select(cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
    dplyr::arrange(condition_tp, cell_line_label)
  heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
  heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
  ensembl2symbol_annot <- deg_comparison_results$results_all %>%
    dplyr::select(ensembl_id, mgi_symbol)

 

  color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use

  ann_colors = list(
    experiment = c(EXP9.8 = "grey")
  )

  # ,                  filename = paste0(deg_TpNK_tp2_vs_Tonly_tp1_DIR, "tp2_vs_tp1_inclEXP9_7_heatmap_intersect_54genes.pdf")
  heatmap_result <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
                                       main = "Heatmap of signif. DEG - removed cell line BatchEffect",
                                       scale = "row",
                                       annotation_col = annotation_col,
                                       annotation_colors = ann_colors,
                                       #annotation_row = row_annot_symbols,
                                       show_colnames = FALSE,
                                       show_rownames = FALSE,
                                       cluster_cols = FALSE,
                                       #cluster_rows = counts_deg_ord_row_cor_hclust,
                                       color = color.scheme,
                                       fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
  )
  save_pheatmap_pdf(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap_batch_eff_removed.pdf"),
                    x=heatmap_result,
                    width = 10, height = 10)
  
  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap_batch_eff_removed.png"),
         plot=heatmap_result,
         width = 10, height = 10, units = "in")

  #Heatmap not corrected for the batch effect
  heatmap_result <- pheatmap::pheatmap(heatmap_counts_deg_ord,
                                       main = "Heatmap of signif. DEG",
                                       scale = "row",
                                       annotation_col = annotation_col,
                                       annotation_colors = ann_colors,
                                       #annotation_row = row_annot_symbols,
                                       show_colnames = FALSE,
                                       show_rownames = FALSE,
                                       cluster_cols = FALSE,
                                       #cluster_rows = counts_deg_ord_row_cor_hclust,
                                       color = color.scheme,
                                       fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
  )
  save_pheatmap_pdf(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap.pdf"),
                    x=heatmap_result,
                    width = 10, height = 10)
  
  ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap.png"),
         plot=heatmap_result,
         width = 10, height = 10, units = "in")
  
}                            
## RNA objects file ' 1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading

## RNA objects file ' 2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading

## RNA objects file ' 3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading

Plotting the Venn Diagramm for the comparisons:

| Deg | Group 1 | Group 2 |

| — | ——————————- | ——————————- |

| 1 | Tumor_plus_WT_NK_timepoint_2 | Tumor_only_timepoint_2 |

| 2 | Tumor_plus_IFNg_timepoint_2 | Tumor_only_timepoint_2 |

| 3 | Tumor_plus_PrfKO_NK_timepoint_2 | Tumor_only_timepoint_2 |

Deg1_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_WT_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg2_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2/degTumor_plus_IFNg_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg3_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_PrfKO_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")

Deg1 <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_ens <- Deg1[,c('mgi_symbol', 'ensembl_id')]
Deg1 <- Deg1$mgi_symbol

Deg2 <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_ens <- Deg2[, c('mgi_symbol', 'ensembl_id')]
Deg2 <- Deg2$mgi_symbol

Deg3 <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_ens <- Deg3[, c('mgi_symbol', 'ensembl_id')]
Deg3 <- Deg3$mgi_symbol

Deg_list <-  list("Tmr+WT_NK TP2 vs T only TP2" = Deg1,
                  "Tmr+IFNg TP2 vs T only TP2" = Deg2, 
                  "Tmr+PrfKO NK TP2 vs T only TP2" = Deg3)

library(ggvenn)

ven_diag <- ggvenn(Deg_list)
ven_diag

#order the genes for the further Heatmap
Deg12_intersect <- intersect(Deg1, Deg2)
Deg123_intersect <- intersect(Deg12_intersect, Deg3)
Deg23_intersect <- intersect(Deg2, Deg3)
Deg31_intersect <- intersect(Deg3, Deg1)

Deg12_intersect <- Deg12_intersect[!(Deg12_intersect %in% Deg123_intersect)]
Deg23_intersect <- Deg23_intersect[!(Deg23_intersect %in% Deg123_intersect)]
Deg31_intersect <- Deg31_intersect[!(Deg31_intersect %in% Deg123_intersect)]

Deg1_unique <- Deg1[!(Deg1 %in% Deg12_intersect | 
                      Deg1 %in% Deg31_intersect |
                      Deg1 %in% Deg123_intersect)]

Deg2_unique <- Deg2[!(Deg2 %in% Deg12_intersect |
                      Deg2 %in% Deg23_intersect |
                      Deg2 %in% Deg123_intersect)]

Deg3_unique <- Deg3[!(Deg3 %in% Deg31_intersect |
                      Deg3 %in% Deg23_intersect |
                      Deg3 %in% Deg123_intersect)]

List_of_genes_ordered <- c(Deg31_intersect,
                           Deg1_unique,
                           Deg12_intersect,
                           Deg2_unique,
                           Deg23_intersect,
                           Deg3_unique,
                           Deg123_intersect)

Deg_ens <- rbind(Deg1_ens, Deg2_ens, Deg3_ens)
Deg_ens <- unique(Deg_ens)

Deg_ens_ord <- Deg_ens[match(List_of_genes_ordered, Deg_ens$mgi_symbol),]
#These are the data that will be used for the heatmap

list_of_comparisons_venn_heatmap <- c("Tumor_plus_WT_NK_timepoint_2", 
                                      "Tumor_plus_IFNg_timepoint_2", 
                                      "Tumor_plus_PrfKO_NK_timepoint_2", 
                                      "Tumor_only_timepoint_2")
experiment_subset <- c("EXP9.8")

dds_subset_venn_heatmap <- dds_filt[ , (dds_filt$condition_tp %in% list_of_comparisons_venn_heatmap) & (dds_filt$experiment %in% experiment_subset)]  
metadata_heatmap <- as.data.frame(colData(dds_subset_venn_heatmap))
heatmap_counts <- SummarizedExperiment::assay(dds_subset_venn_heatmap)
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% Deg_ens_ord$ensembl_id,]
annotation_col <- metadata_heatmap %>%
  dplyr::select(cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
  dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_deg_ord <- heatmap_counts_deg_ord[match(Deg_ens_ord$ensembl_id, rownames(heatmap_counts_deg_ord)),]
rownames(heatmap_counts_deg_ord) == Deg_ens_ord$ensembl_id
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [27] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [53] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [79] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [105] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [131] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [183] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [209] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [235] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [261] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [287] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [339] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [365] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [391] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [417] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [443] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [469] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [495] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [521] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [547] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [573] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [599] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [625] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [651] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [677] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [703] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [729] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rownames(heatmap_counts_deg_ord) <- Deg_ens_ord$mgi_symbol

color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
  experiment = c(EXP9.4 = "#4682B4", EXP9.5 = "#7846B4", EXP9.6 = "#B47846", EXP9.7 = "#82B446", EXP9.8 = "grey")
)

heatmap_result <- pheatmap::pheatmap(heatmap_counts_deg_ord,
                                     main = "Heatmap of signif. DEG",
                                     scale = "row",
                                     annotation_col = annotation_col,
                                     annotation_colors = ann_colors,
                                     #annotation_row = row_annot_symbols,
                                     show_colnames = FALSE,
                                     show_rownames = TRUE,
                                     cluster_cols = FALSE,
                                     cluster_rows = T,
                                     annotation_names_row = TRUE,
                                     #cluster_rows = counts_deg_ord_row_cor_hclust,
                                     color = color.scheme,
                                     fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
)

# This heatmap doesn't look so great.

# save the DEG list:
List_of_genes_ordered_forxlsx <- list("Deg1_T+WT_NK_TP2_VS_To_TP2" = Deg1,
                                      "Deg2_T+IFNg_TP2_VS_To_TP2" = Deg2,
                                      "Deg3_T+PrfKO_NK_TP2_VS_To_TP2" = Deg3,
                                      "Deg31_exlusive_intersect" = Deg31_intersect,
                                      "Deg1_unique" = Deg1_unique,
                                      "Deg12_exlusive_intersect" = Deg12_intersect,
                                      "Deg2_unique" = Deg2_unique,
                                      "Deg23_exlusive_intersect" = Deg23_intersect,
                                      "Deg3_unique" = Deg3_unique,
                                      "Deg123_intersect" = Deg123_intersect)

openxlsx::write.xlsx(List_of_genes_ordered_forxlsx, 
                     "~/workspace/results_dir/nk_tum_immunoedit_complete/overlaping_genes_deg_1_2_3.xlsx")

# adding supplementary information (p-val, L2FC, etc for genes)
# read the DEG files again
Deg1_xlsx <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_xlsx <- Deg1_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description",  "log2FoldChange", "padj")

Deg2_xlsx <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_xlsx <- Deg2_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description",   "log2FoldChange",  "padj")

Deg3_xlsx <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_xlsx <- Deg3_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description",  "log2FoldChange", "padj")


List_of_genes_ordered_forxlsx_sup <- List_of_genes_ordered_forxlsx

List_of_genes_ordered_forxlsx_sup$`Deg1_T+WT_NK_TP2_VS_To_TP2` <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg1_T+WT_NK_TP2_VS_To_TP2`, Deg1_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$`Deg2_T+IFNg_TP2_VS_To_TP2` <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg2_T+IFNg_TP2_VS_To_TP2`, Deg2_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$`Deg3_T+PrfKO_NK_TP2_VS_To_TP2` <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg3_T+PrfKO_NK_TP2_VS_To_TP2`, Deg3_xlsx$mgi_symbol),]

List_of_genes_ordered_forxlsx_sup$Deg1_unique <-  Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg1_unique, Deg1_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$Deg2_unique <-  Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg2_unique, Deg2_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$Deg3_unique <-  Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg3_unique, Deg3_xlsx$mgi_symbol),]

Deg31_excl <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect, Deg3_xlsx$mgi_symbol),] %>%
              rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")
Deg13_excl <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect, Deg1_xlsx$mgi_symbol),] %>%
  select("log2FoldChange", "padj") %>%
  rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect <- cbind(Deg31_excl, Deg13_excl)

Deg21_excl <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect, Deg2_xlsx$mgi_symbol),] %>%
  rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")
Deg12_excl <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect, Deg1_xlsx$mgi_symbol),] %>%
  select("log2FoldChange", "padj") %>%
  rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect <- cbind(Deg21_excl, Deg12_excl)

Deg23_excl <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg23_exlusive_intersect, Deg2_xlsx$mgi_symbol),] %>%
  rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")
Deg32_excl <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg23_exlusive_intersect, Deg3_xlsx$mgi_symbol),] %>%
  select("log2FoldChange", "padj") %>%
  rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect <- cbind(Deg23_excl, Deg32_excl)


Deg123_1 <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg1_xlsx$mgi_symbol),] %>%
  rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")

Deg123_2 <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg2_xlsx$mgi_symbol),] %>%
  select("log2FoldChange", "padj") %>%
  rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")

Deg123_3 <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg3_xlsx$mgi_symbol),] %>%
  select("log2FoldChange", "padj") %>%
  rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")

List_of_genes_ordered_forxlsx_sup$Deg123_intersect <- cbind(Deg123_1, Deg123_2, Deg123_3)


openxlsx::write.xlsx(List_of_genes_ordered_forxlsx_sup, 
                     "~/workspace/results_dir/nk_tum_immunoedit_complete/overlaping_genes_deg_1_2_3_with_extra.xlsx")
Plots for the publication:

Plotting the PCA plot of the 4 groups

#Tumor_plus_WT_NK_timepoint_2 Tumor_plus_IFNg_timepoint_2 Tumor_plus_PrfKO_NK_timepoint_2 Tumor_only_timepoint_2

save_dir <- "~/workspace/results_dir/EXP9.8_publication/"
dir.create(save_dir)

condition_tp_subset <- c("Tumor_plus_WT_NK_timepoint_2",  
                         "Tumor_plus_IFNg_timepoint_2", 
                         "Tumor_plus_PrfKO_NK_timepoint_2", 
                         "Tumor_only_timepoint_2")

dds_subset <- dds_filt[ , (dds_filt$condition_tp %in% condition_tp_subset) & (dds_filt$experiment %in% experiment_subset)]  

# Dropping levels
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)
## 
##          Tumor_only_timepoint_2    Tumor_plus_WT_NK_timepoint_2     Tumor_plus_IFNg_timepoint_2 Tumor_plus_PrfKO_NK_timepoint_2 
##                               6                               6                               6                               6
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)
## 
## EXP9.8 
##     24
dds_subset$experiment <- droplevels(dds_subset$experiment)
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)

# filtering lowly expressed genes
dds_subset_filt <- filterDatasets(dds_subset, 
                                  abs_filt = TRUE, 
                                  abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
## Original dds object samples:  24  genes:  15430 
## Minimum number of samples with expression: 3 
## Number of filtered genes: 3228 
## Filtered dds object has samples: 24 genes: 12202
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info;
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previous runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
  dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate) 

key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)), 
                                                   strata = c("condition_tp"), 
                                                   data = key_metadata_subset)

tableone::kableone(key_variables_tableOne$CatTable,
                   caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
  kableExtra::kable_material(c("striped", "hover"))
Overview of number of samples in different categories (experiment, condition,…).
Tumor_only_timepoint_2 Tumor_plus_WT_NK_timepoint_2 Tumor_plus_IFNg_timepoint_2 Tumor_plus_PrfKO_NK_timepoint_2 p test
n 6 6 6 6
experiment = EXP9.8 (%) 6 (100.0) 6 (100.0) 6 (100.0) 6 (100.0) NA
cell_line_label = D (%) 3 ( 50.0) 3 ( 50.0) 3 ( 50.0) 3 ( 50.0) 1.000
technical_replicate (%) 1.000
1 2 ( 33.3) 2 ( 33.3) 2 ( 33.3) 2 ( 33.3)
2 2 ( 33.3) 2 ( 33.3) 2 ( 33.3) 2 ( 33.3)
3 2 ( 33.3) 2 ( 33.3) 2 ( 33.3) 2 ( 33.3)
transf_object <- vsd_subset_filt

#Before removing Batch Effect
pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object[,transf_object$cell_line_label == "A"], 
                                     cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                                     color_variable = "condition_tp", 
                                     shape_variable = "cell_line_label",
                                     ntop_genes = 1000) +
  ggtitle(paste( "PCA plot. EXP9.8 Cell line A")) +
  scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))
plot(pca_deg_plotCellLabel)

ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_A.png"), pca_deg_plotCellLabel,
                            width = 20, height = 14, units = "cm")

pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object[,transf_object$cell_line_label == "D"], 
                                     cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"), 
                                     color_variable = "condition_tp", 
                                     shape_variable = "cell_line_label",
                                     ntop_genes = 1000) +
  ggtitle(paste( "PCA plot. EXP9.8 Cell line D")) +
  scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))+
  scale_shape_manual(values = 17)
plot(pca_deg_plotCellLabel)

ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_D.png"), pca_deg_plotCellLabel,
                            width = 20, height = 14, units = "cm")

# Remove batch effect (only for the cell_line_label)
transf_batch_NObatch_cell_label <- vsd_subset_filt
transf_batch_NObatch_cell_label_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_cell_label), 
                                                                  transf_batch_NObatch_cell_label$cell_line_label)
SummarizedExperiment::assay(transf_batch_NObatch_cell_label) <- transf_batch_NObatch_cell_label_count

pca_deg_NObatch_cell_label <- generatePCA(transf_object = transf_batch_NObatch_cell_label, 
                                          cond_interest_varPart = c("condition_tp", "cell_line_label"), 
                                          color_variable = "condition_tp", 
                                          shape_variable = "cell_line_label",
                                          ntop_genes = 1000) +
  scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))+
  ggtitle(paste( "PCA plot. EXP9.8 Batch corrected for cell_line"))
plot(pca_deg_NObatch_cell_label)

ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_batch_corrected_cell_lines.png"), pca_deg_NObatch_cell_label,
                            width = 20, height = 14, units = "cm")


##### Venn diagramms

Deg1_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_WT_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg2_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2/degTumor_plus_IFNg_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg3_filename <-  paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_PrfKO_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")



Deg1 <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_pos <- Deg1[Deg1$log2FoldChange > 0, c('mgi_symbol')  ]
Deg1_neg <- Deg1[Deg1$log2FoldChange < 0, c('mgi_symbol')  ]

Deg2 <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_pos <- Deg2[Deg2$log2FoldChange > 0, c('mgi_symbol')  ]
Deg2_neg <- Deg2[Deg2$log2FoldChange < 0, c('mgi_symbol')  ]
Deg2_neg <- Deg2_neg[Deg2_neg!=""]

Deg3 <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_pos <- Deg3[Deg3$log2FoldChange > 0, c('mgi_symbol')  ]
Deg3_neg <- Deg3[Deg3$log2FoldChange < 0, c('mgi_symbol')  ]
Deg3_neg <- Deg3_neg[Deg3_neg!=""]

Deg_list_pos <-  list("Tmr+WT_NK TP2 vs T only TP2" = Deg1_pos,
                  "Tmr+IFNg TP2 vs T only TP2" = Deg2_pos, 
                  "Tmr+PrfKO NK TP2 vs T only TP2" = Deg3_pos)

Deg_list_neg <-  list("Tmr+WT_NK TP2 vs T only TP2" = Deg1_neg,
                      "Tmr+IFNg TP2 vs T only TP2" = Deg2_neg, 
                      "Tmr+PrfKO NK TP2 vs T only TP2" = Deg3_neg)
library(ggvenn)

ven_diag <- ggvenn(Deg_list_pos,fill_color = c("#553388", "#A3E7FC", "#26C485"))
ven_diag

ggsave(filename = file.path(save_dir, "venn_diag_positive_up_reg.png"), ven_diag,
       width = 20, height = 20, units = "cm")

ven_diag <- ggvenn(Deg_list_neg,fill_color = c("#553388", "#A3E7FC", "#26C485"))
ven_diag

ggsave(filename = file.path(save_dir, "venn_diag_positive_down_reg.png"), ven_diag,
       width = 20, height = 20, units = "cm")


Deg12_intersect_pos <- intersect(Deg1_pos, Deg2_pos)
Deg123_intersect_pos <- intersect(Deg12_intersect_pos, Deg3_pos)
Deg23_intersect_pos <- intersect(Deg2_pos, Deg3_pos)
Deg31_intersect_pos <- intersect(Deg3_pos, Deg1_pos)

Deg12_intersect_pos_scecif <- setdiff(Deg12_intersect_pos, Deg123_intersect_pos)
Deg23_intersect_pos_specif <- setdiff(Deg23_intersect_pos, Deg123_intersect_pos)
Deg31_intersect_pos_specif <- setdiff(Deg31_intersect_pos, Deg123_intersect_pos)

setdiff(Deg1_pos, c(Deg2_pos, Deg3_pos))
##  [1] "Bnip3"   "Aldoa"   "Pgk1"    "Tpi1"    "Ero1l"   "Pkm"     "Ddit4"   "Pdk1"    "Bnip3l"  "Pgm1"    "Fam162a" "Ulk3"    "Pfkl"   
## [14] "Rasal3"  "Pfkp"    "Ankrd37" "Kdm3a"   "Map4"    "Slc37a4" "Tgm2"    "Trpc4"   "Zfp810"  "Cpne5"   "Cox7a1"  "Egln1"   "Ak4"    
## [27] "Aldoc"   "Hsd11b1" "Zfp62"   "Crlf1"   "Mgarp"   "Ropn1l"  "Cd244a"  "Gstm4"   "Ercc1"   "Izumo4"
setdiff(Deg2_pos, c(Deg1_pos, Deg3_pos))
##   [1] "Ifi47"         "Tgtp2"         "Gbp7"          "Gbp4"          "Gbp5"          "Irgm2"         "Tap1"          "H2-K1"        
##   [9] "Jun"           "Socs1"         "Serpina3f"     "Gm12250"       "Irgm1"         "B2m"           "Tapbpl"        "Batf2"        
##  [17] "Cd1d1"         "9330175E14Rik" "F830016B08Rik" "Psmb8"         "Serpina3g"     "Gbp3"          "Parp11"        "Gm12216"      
##  [25] "Irf9"          "AW112010"      "Psmb10"        "Ppa1"          "Nlrc5"         "Wars"          "Lap3"          "Ifitm3"       
##  [33] "Gm43302"       "Parp9"         "Dgcr6"         "Il18bp"        "Ube2l6"        "Trafd1"        "Tapbp"         "Tmem140"      
##  [41] "Psap"          "Nampt"         "Casp1"         "Trim12c"       "Tfrc"          "Rnf19b"        "Gbp2b"         "Ly6c1"        
##  [49] "Isg20"         "Ctss"          "Coa5"          "Gpx1"          "Slfn2"         "Ifi44"         "Psme1"         "Dtx3l"        
##  [57] "Psmg4"         "Bst2"          "Gimap4"        "P2ry14"        "Ifitm1"        "Isg15"         "Tnfrsf26"      "Ffar2"        
##  [65] "Prm1"          "H2-D1"         "Sp110"         "Gm1966"        "Psme2"         "Trim30a"       "Idnk"          "Mpeg1"        
##  [73] "Pecam1"        "Ifngr2"        "Ccr2"          "Tnfsf10"       "Prodh"         "Gm4486"        "Ifi35"         "Itm2b"        
##  [81] "Csf1"          "Psmb9"         "Samhd1"        "Usp18"         "Ogfr"          "Fas"           "Ifi209"        "Rnf114"       
##  [89] "Ly6c2"         "Oasl2"         "Snx10"         "Cd274"         "Cfp"           "Rsad2"         "Plac8"         "Psme2b"       
##  [97] "Guca1a"        "Klf6"          "Tmem33"        "Erap1"         "H2-T22"        "Ctsc"          "Aida"          "Cask"         
## [105] "Ms4a6c"        "Myof"          "Rmdn3"         "BC051226"      "Cemip2"        "Nod1"          "Gm12840"       "Xaf1"         
## [113] "H2-Q5"         "Tgtp1"         "Phc2"          "Parp12"        "Reps1"         "Oas3"          "Mapk9"         "Rnase6"       
## [121] "Glrx"          "Vwa5a"         "Il2rg"         "Lgals3"        "Ppp2r5b"       "1600014C10Rik" "Ly6d"          "Rsbn1l"       
## [129] "Apol7b"        "Nuak2"         "Dio2"          "Ciita"         "Olfr753-ps1"   "Fcgr3"         "Nmi"           "Snhg15"       
## [137] "Tmtc4"         "Apex2"         "Trim34a"       "Dkk3"          "Gm20627"       "Ica1"          "Prdm1"         "Serpinb6a"    
## [145] "Ifit1"         "Cebpb"         "Tmco4"         "Cep89"         "Ccl9"          "Stat3"         "Gm6545"        "Plscr1"       
## [153] "Mmp13"         "Sema3g"        "H2-DMb2"       "Piwil2"        "Uba7"          "Ccrl2"         "Chpf2"         "Rasa4"        
## [161] "Gda"           "Arhgap25"      "Lax1"          "Ifit1bl1"      "Parp14"        "Irf7"          "Gm49339"       "Selenow"      
## [169] "Tusc1"         "Ryr2"          "Atp8a1"        "Sco1"          "Gm48161"       "Fbn1"          "Rgs1"          "Pon3"         
## [177] "Marchf5"       "Znrd2"         "Srxn1"         "Tspan2"        "Gpr18"         "Tmem50b"       "Adar"          "Gm50255"      
## [185] "Osm"           "Phf11a"        "Gm10076"       "Shb"           "Il12rb1"       "Trim12a"       "H2-T23"        "Igf1"         
## [193] "Pgf"           "Wsb2"          "Ifi203"        "Ifi213"        "Scly"          "Usf1"          "Bcl3"          "Alcam"        
## [201] "Sh2d5"         "Timm21"        "Cd36"          "Vsir"          "Snhg5"         "Cmpk2"         "Minpp1"        "Tmem138"      
## [209] "Traf5"         "Creb3"         "Smg8"          "Mat2b"         "Sesn3"         "Il15ra"        "Ly6i"          "Rpl17-ps8"    
## [217] "Ifi206"        "Themis2"       "Tmem86a"       "Parp10"        "Gpatch11"      "Zbtb32"        "Oas1a"         "Zfp422"       
## [225] "Serpini1"      "Dhx58"         "Cxcr3"         "BC147527"      "Ifi211"        "Ndufc1"        "Hck"           "Gm16701"      
## [233] "Slc11a1"       "Prkaa2"        "Lsm8"          "Aopep"         "Snx14"         "Inpp4a"        "Hpgds"         "Calhm6"       
## [241] "Zfp846"        "Rnf213"        "Trim21"        "Gpm6a"         "Slc35e2"       "Ssbp2"         "Abtb2"         "Ssbp1"        
## [249] "Marchf1"       "Mtf1"          "Rps18-ps6"     "H2-T24"        "Bpnt1"         "Nt5e"          "Polr1e"        "Coq10b"       
## [257] "Cog1"          "Rangrf"        "Htatip2"       "Dnase1l3"      "Slc22a5"       "Trmt1l"        "Slc23a2"       "H2-DMb1"      
## [265] "Parp3"         "Mettl17"       "Lyrm1"         "Igkc"          "Rhbdf2"        "Slc30a4"       "9930104L06Rik" "Ccdc146"      
## [273] "Gosr1"         "Tmem106a"      "Abhd15"        "Cpne8"         "Gm24265"       "Erlin1"        "Endod1"        "Vps51"        
## [281] "Nfkb1"         "Aicda"         "Slc26a6"
setdiff(Deg3_pos, c(Deg1_pos, Deg2_pos))
##  [1] "Rnaseh2a" "Dmrta2"   "Tstd3"    "Fastkd2"  "Rad52"    "Cox6a2"   "Dpf1"     "Tnnt2"    "Abcb4"    "Cd28"     "Pmel"     "Ltk"     
## [13] "Zfp113"   "Poli"     "H2bc3"    "Mapk12"
Deg12_intersect_neg <- intersect(Deg1_neg, Deg2_neg)
Deg123_intersect_neg <- intersect(Deg12_intersect_neg, Deg3_neg)
Deg23_intersect_neg <- intersect(Deg2_neg, Deg3_neg)
Deg31_intersect_neg <- intersect(Deg3_neg, Deg1_neg)


Deg12_intersect_neg_scecif <- setdiff(Deg12_intersect_neg, Deg123_intersect_neg)
Deg23_intersect_neg_specif <- setdiff(Deg23_intersect_neg, Deg123_intersect_neg)
Deg31_intersect_neg_specif <- setdiff(Deg31_intersect_neg, Deg123_intersect_neg)

setdiff(Deg1_neg, c(Deg2_neg, Deg3_neg))
##  [1] "Phip"          "Ighg2b"        "Zfp709"        "Tlk2"          "H2ac20"        "Lbhd1"         "Pogk"          "Gm20186"      
##  [9] "Rgs10"         "1300002E11Rik" "R3hdm2"        "Gng4"          "Rabgap1l"      "Aicda"         "Smcr8"         "Tcf7l1"       
## [17] "Ggps1"         "Stag2"         "2700038G22Rik" "Marcks"        "Smpd3"         "Gas7"          "4930523C07Rik" "Samd8"        
## [25] "Ago1"          "Ik"            "Rpph1"
setdiff(Deg2_neg, c(Deg1_neg, Deg3_neg))
##   [1] "Cdh1"          "Ckap4"         "St3gal5"       "Zcchc24"       "Txnrd3"        "Neurl1b"       "Pygm"          "Dipk1b"       
##   [9] "Cd22"          "Grb7"          "Crhbp"         "Prkcb"         "Mki67"         "Lta"           "Strbp"         "Cd248"        
##  [17] "Actn1"         "Klhdc2"        "Trp53i11"      "Ramp1"         "Itih5"         "Nsg2"          "Mgll"          "Mier3"        
##  [25] "Ndrg1"         "Adgrl1"        "Tac4"          "Nuak1"         "Tyro3"         "Lat"           "Scube3"        "Ptgs1"        
##  [33] "Nedd9"         "Coro7"         "Otub2"         "Kifc5b"        "Ptp4a3"        "Dgkd"          "Tox"           "Gm36120"      
##  [41] "Vangl2"        "Cd37"          "Endou"         "Atad5"         "Plcl1"         "Epb41l5"       "Hs3st2"        "Slc16a3"      
##  [49] "1700019D03Rik" "Ehd2"          "Rgs7bp"        "Def6"          "Pycr1"         "Ets1"          "Satb1"         "Jag2"         
##  [57] "Napg"          "Ctnnbip1"      "Irf4"          "Faap100"       "Loxl2"         "Nfia"          "Tcn2"          "Shroom3"      
##  [65] "Gas2l3"        "Scd1"          "Bok"           "Kdm7a"         "Gm48768"       "Irs2"          "Pcx"           "Cmah"         
##  [73] "Abi2"          "Adgre5"        "Gmpr"          "Iqsec1"        "Pola1"         "Hbb-bt"        "Card11"        "Dusp7"        
##  [81] "Prdm8"         "E2f7"          "Pou2f1"        "Hvcn1"         "Icam2"         "1700120C14Rik" "Ano10"         "Igf2bp2"      
##  [89] "Mfsd4a"        "Pecr"          "Cit"           "Emp1"          "Egfl7"         "Klrb1c"        "Tmed6"         "Smtnl2"       
##  [97] "Pik3c2a"       "Atp2a3"        "Bhlhe40"       "Sema4b"        "Nab2"          "Tcf7l2"        "Jcad"          "Tbx19"        
## [105] "Slc37a2"       "Sema5a"        "Brdt"          "C730034F03Rik" "Nkd2"          "C2cd5"         "Abca2"         "Arhgef1"      
## [113] "Gm9227"        "S100a11"       "Slc47a1"       "Kank2"         "Ncs1"          "BC049352"      "Nek1"          "Fam160b1"     
## [121] "Abraxas1"      "Pla2g2f"       "Gm42047"       "Gdf11"         "Batf"          "Ccdc47"        "Crisp1"        "Pygl"         
## [129] "Ltb"           "0610040J01Rik" "A730063M14Rik" "Stradb"        "Cryl1"         "Rtn1"          "Ephx1"         "Cdc42ep4"     
## [137] "Accs"          "Rbms3"         "Slc44a1"       "4632427E13Rik" "Rcan3"         "St6gal1"       "Ccpg1"         "Rapgefl1"     
## [145] "Tnfrsf9"       "Glis2"         "Slc17a8"       "Bfsp2"         "Tmem176b"      "Fam8a1"        "E130308A19Rik" "Tmem158"      
## [153] "Mcm8"          "Rapgef3"       "Tsc22d3"       "Rasl10a"       "Chst15"        "Syndig1l"      "Map3k1"        "Srgap3"       
## [161] "Stox2"         "H2ac6"         "Vldlr"         "Usp6nl"        "Smpd1"         "Slc25a40"      "Kdm4c"         "Abcd3"        
## [169] "Mbd4"          "Tnni2"         "Plin3"         "Fhl1"          "Azin1"         "Prxl2c"        "Pcdh11x"       "Hes1"         
## [177] "Kif21b"        "Cth"           "Mybl1"         "Armt1"         "Izumo1r"       "Cdhr2"         "Mtss1"         "Cbx7"         
## [185] "Ccdc112"       "Zfp821"        "Cdhr4"         "Smyd4"         "Gm9993"        "Cradd"         "Uaca"          "Gm36684"      
## [193] "Gpt2"          "Hook3"         "Acp5"          "Wdr91"         "Arhgef17"      "Gm11824"       "Gfi1b"         "Hid1"         
## [201] "Gm26531"       "Bclaf3"        "Pcdh9"         "Ier3"          "Sel1l3"        "Abtb1"         "Gkap1"         "Rtkn2"        
## [209] "Sorcs2"        "Qrfp"          "Tasor2"        "Apbb2"         "Syngr1"        "Rprm"          "4933427D14Rik" "Ddx25"        
## [217] "Zfp318"        "Dalrd3"        "Prpf40b"       "Mpp3"          "Ribc1"         "Itga9"         "Zyg11b"        "Cavin1"       
## [225] "Sult1d1"       "Abcb4"         "Gm867"         "Angpt1"        "Grip1"         "Acy3"          "Mpp7"          "a"            
## [233] "Vps13b"        "Cpm"           "Txnrd2"        "Hs3st1"        "Ltbp4"         "Tulp4"         "Chd3"          "Pcbd1"        
## [241] "Nme3"          "Spats2"        "Dusp23"        "Slc7a3"        "Tctn1"         "Zfp820"        "Phldb1"        "Zfp956"       
## [249] "Dync2h1"       "Cdkal1"        "Six5"          "Ccdc88c"       "Nup160"        "Meis2"         "Zan"           "Gas2"         
## [257] "Ramp2"
setdiff(Deg3_neg, c(Deg1_neg, Deg2_neg))
##  [1] "Utp20"     "Snrpd3"    "Dleu2"     "Rbm19"     "Slfn2"     "Glb1l"     "Lmo7"      "Cep290"    "Maged1"    "Tbk1"      "Svip"     
## [12] "Rn7sk"     "Kmt2d"     "Iqce"      "Gm47304"   "Bmp2k"     "Clock"     "Eif4enif1" "Rmnd1"     "Enc1"      "Ap1ar"     "Sema7a"   
## [23] "Mir17hg"   "Relch"
cid <- 0
###### volcano plots for the publication ######

sessionInfo

sessionInfo()

# saving session info
sink(file.path(results_dir, paste0("sessionInfo_", project_name,".txt")))
sessionInfo()
sink()

renv::snapshot(lockfile = file.path(base_dir, paste0(project_name, "_renv.lock")))
renv::status(lockfile = file.path(base_dir, paste0(project_name, "_renv.lock")))
LS0tCnRpdGxlOiAiTksgY2VsbHMgaW1tdW5vZWRpdGluZyIKYXV0aG9yOiAiQWxla3NhbmRyIEJ5a292ICsgY29kZSBmcm9tIFBldGVyIFJlcGlzY2FrIgpkYXRlOiAnYHIgZm9ybWF0KFN5cy50aW1lKCksICIlZCAlQiAlWSIpYCcKb3V0cHV0OgogaHRtbF9kb2N1bWVudDoKICB0aGVtZTogcmVhZGFibGUKICBoaWdobGlnaHQ6IHRhbmdvCiAgY29kZV9mb2xkaW5nOiBoaWRlCiAgY29kZV9kb3dubG9hZDogdHJ1ZQojICB0b2M6IHRydWUKIyAgdG9jX2RlcHRoOiAyCiMgIHRvY19mbG9hdDogdHJ1ZQphbHdheXNfYWxsb3dfaHRtbDogdHJ1ZQpwYXJhbXM6CiAgY29uZmlnX2ZpbGU6ICJua190dW1faW1tdW5vZWRpdGluZ19jb25maWcueWFtbCIKLS0tCgojIFByb2plY3QgZGVzY3JpcHRpb24gCgpVbmNvdmVyaW5nIGltbXVuZSBldmFzaW9uIG1lY2hhbmlzbXMgb2YgbGV1a2VtaWMgY2VsbHMgZnJvbSBuYXR1cmFsIGtpbGxlciBjZWxscy4gSW4gdGhpcyBzdWItcHJvamVjdCBSTkEgc2VxdWVuY2luZyBvZiBsZXVrZW1pYyBjZWxscyBpcyB1c2VkIHRvIGRpc2NvdmVyIHRyYW5zY3JpcHRpb25hbCBjaGFuZ2VzIHJlc3VsdGluZyBpbiBOSyBjZWxsLXJlc2lzdGFuY2UuIFRoZXJlIGlzIGFub3RoZXIgc3ViLXByb2plY3QgdXNpbmcgQVRBQy1zZXEgKHRyYW5zcG9zYXNlLWFjY2Vzc2libGUgY2hyb21hdGluKSB0byBlcGlnZW5ldGljIGNoYW5nZXMgcmVzdWx0aW5nIGluIE5LIGNlbGwtcmVzaXN0YW5jZSAtIHRoaXMgaXMgbm90IHBhcnQgb2YgdGhpcyByZXBvcnQuCgojIyBFeHBlcmltZW50YWwgZGVzaWduCgpUYWtlbiBmcm9tIGRpc2N1c3Npb24gc3VtbWFyeSAoJzIxMDA5X1BvcmplY3QgZGlzY3Vzc2lvbiBzdW1tYXJ5LmRvY3gnKTogCgoqCTQgZGlmZmVyZW50IGNlbGwgbGluZXMgKHAxODUjMTNHLCBwMTg1IzEzSCwgcDE4NSMxNU0sIHAxODUjMTVPKS4gQUREIGRlc2NyaXB0aW9uIGFib3V0IGNlbGwgbGluZSBkaWZmZXJlbmNlcy4KKglBbGwgY29uZGl0aW9ucyBpbiB0ZWNobmljYWwgdHJpcGxpY2F0ZXMgYW5kIDIgYmlvbG9naWNhbCByZXBsaWNhdGVzIChiaW9sb2dpY2FsIHJlcGxpY2F0ZXMgY2FuIGJlIGFkZGVkIGFueSB0aW1lLCBpZiB0aGVyZSBpcyBhIG5lZWQpLgoqIFRlY2huaWNhbCByZXBsaWNhdGUgbWVhbnMgdGhhdCB3ZSBhbHJlYWR5IHN0YXJ0IHRoZSBjby1jdWx0dXJlIGluIGEgc2VwYXJhdGUgd2VsbC4KCiFbQmlvbG9naWNhbCBhbmQgdGVjaG5pY2FsIHJlcGxpY2F0ZXNdKE5LcHJvamVjdF9iaW9fdGVjaG5pY2FsX3JlcGxpY2F0ZXMucG5nKQoKIyMgVGltZWxpbmUgb2YgdGhlIGV4cGVyaW1lbnQKCioJT24gdGltZSBwb2ludCBEYXkgNCB3ZSBGQUNTIHNvcnQgdGhlIG9yaWdpbmFsIHR1bW9yIGNlbGxzIChsaWtlIERheSAwKSBhbmQgdGhlIHR1bW9yIGNlbGxzIHdoaWNoIHdlcmUgY28tY3VsdHVyZWQgZm9yIDQgZGF5cyB3aXRoIE5LIGNlbGxzIGZvciB0aGUgZm9sbG93aW5nIGFuYWx5c2VzOiBBVEFDIHNlcSwgUk5BIHNlcSBhbmQgYmFyY29kZSBhbmFseXNpcy4KQWRkaXRpb25hbGx5IHdlIGNvLWN1bHR1cmUgYSBjZXJ0YWluIGFtb3VudCBvZiB0aGUgc2FtZSB0dW1vciBjZWxscyBhZ2FpbiB3aXRoIE5LIGNlbGxzIGZvciB0aGUgYW5hbHlzaXMgb2YgRGF5IDEwLgoqCU9uIHRpbWUgcG9pbnQgRGF5IDEwIHdlIEZBQ1Mgc29ydCB0aGUgdHVtb3IgY2VsbHMgd2hpY2ggd2VyZSBjby1jdWx0dXJlZCBmb3IgMTAgZGF5cyB3aXRoIE5LIGNlbGxzIGZvciB0aGUgc2FtZSBhbmFseXNlczogQVRBQyBzZXEsIFJOQSBzZXEgYW5kIGJhcmNvZGUgYW5hbHlzaXMuCgohW0V4cGVyaW1lbnQgVGltZXBvaW50c10oTktwcm9qZWN0X2V4cGVyaW1lbnRfdGltZXBvaW50cy5wbmcpCgojIFJOQS1zZXEgZGF0YSBhbmFseXNpcyB7LnRhYnNldCAudGFic2V0LXBpbGxzfQoKUk5BLXNlcSBtZXRob2RzOiBRdWFudFNlcSAzJyBtUk5BLVNlcSAtIDF4NTBicCBIaVNlcSAzMDAwLzQwMDAuIAoKUHJlLXByb2Nlc3Npbmc6IEFERCBtZXRob2RzK3ZlcnNpb25zIGZvciBwcmVwcm9jZXNzaW5nCiFbUHJlLXByb2Nlc3NpbmcgcGlwZWxpbmVdKE5LcHJvamVjdF9wcmVwcm9jZXNzaW5nX3BpcGVsaW5lLnBuZykKCkRvd25zdHJlYW0gYW5hbHlzaXM6IEFERCBtZXRob2RzK3ZlcnNpb25zIGZvciBkb3duc3RyZWFtIQoKCltNdWx0aVFDIHJlcG9ydF0oTktwcm9qZWN0X292ZXJhbGxfbXVsdGlxYy5odG1sKQpBREQgZnVsbCBtdWx0aXFjIHJlcG9ydCBkZXNjcmlwdGlvbi4gRmV3IG9ic2VydmF0aW9uczoKCiogMycgYmlhcyAoZXhwZWN0ZWQgZm9yIFF1YW50LXNlcSkKKiBFWFA5LjggaGFzIGxvbmdlciByZWFkcyAoPykgMTIwYnAgYXMgb3Bwb3NlZCB0byA1MWJwIGZvciBldmVyeSBvdGhlciBleHBlcmltZW50LiBUaGlzIGlzIGFsc28gcmVmbGVjdGVkIGluIGEgaGlnaGVyIGFsaWdubWVudCB0byBjb2RpbmcgcmVnaW9uLiBGdXJ0aGVyLCBhIGNsZWFyIHNlcGFyYXRpb24gb2YgRVhQOS44IGZyb20gb3RoZXIgZXhwZXJpbWVudHMgaXMgb2JzZXJ2ZWQgb24gdGhlIFBDQSBwbG90LiBIb3dldmVyLCBhbHNvIEVYUDkuNyBpcyBzb21laG93IHNlcGFyYXRlZCAoYWxvbmcgdGhlIFBDMikgZnJvbSB0aGUgb3RoZXIgZXhwZXJpbWVudHMuIFRoZSBxdWFudGlmaWNhdGlvbiBhbmQgYWxpZ25tZW50IHdlcmUgZG9uZSB3aXRoIHRoZSBzYW1lIHNldHRpbmdzIGFsbCB0aGUgZXhwZXJpbWVudHMKCgpgYGB7ciBhbmFseXNpc19wYXJhbWV0ZXJzLCBpbmNsdWRlPUZBTFNFfQojIFNwZWNpZnlpbmcgcGFyYW1ldGVycyBoZXJlIC0tLS0KIyByZW1vdGVzOjppbnN0YWxsX2dpdGh1YigicnN0dWRpby9yZW52QHYwLjE2LjAiKQppZighKCJyZW52IiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygpKSl7aW5zdGFsbC5wYWNrYWdlcygicmVudiIsIHZlcnNpb249IjAuMTYuMCIpfSAgIyBkb2NrZXIgaGFzIDAuMTUuNDsgY2hhbmdlIGluIHByb2R1Y3Rpb24gaW1hZ2UhIAppZighKCJpbXBvcnQiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCkpKXtyZW52OjppbnN0YWxsKCJpbXBvcnQiKX0KaWYoISgieWFtbCIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoKSkpe3JlbnY6Omluc3RhbGwoInlhbWwiKX0KaWYoISgia25pdHIiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCkpKXtyZW52OjppbnN0YWxsKCJrbml0ciIpfQoKIyBzZXR0aW5nIHVwIGluaXRpYWwgcGFyYW1ldGVycwpiYXNlX2RpciA8LSAiL2hvbWUvcnN0dWRpby93b3Jrc3BhY2UvIgpkYXRhX2RpciA8LSBmaWxlLnBhdGgoYmFzZV9kaXIsICJkYXRhc2V0cyIpIApkaXIuY3JlYXRlKGRhdGFfZGlyKQphbm5vdGF0aW9uX2RpciA9IHBhc3RlMChkYXRhX2RpciwgIi9hbm5vdGF0aW9ucy8iKQpkaXIuY3JlYXRlKGFubm90YXRpb25fZGlyKQpyZXN1bHRzX2RpciA8LSBmaWxlLnBhdGgoYmFzZV9kaXIsICJyZXN1bHRzX2RpciIpIApkaXIuY3JlYXRlKHJlc3VsdHNfZGlyKQpzZXR3ZChiYXNlX2RpcikKCiMgcHJvamVjdCBzZXR1cAojIGxvYWRpbmcgY29uZmlnIGZpbGUgLS0tLQpjb25maWdfZmlsZSA8LSBmaWxlLnBhdGgoYmFzZV9kaXIsICJua190dW1faW1tdW5vZWRpdGluZ19jb25maWcueWFtbCIpICAKY29uZmlnIDwtIHlhbWw6OnJlYWRfeWFtbChjb25maWdfZmlsZSkKCiNwcm9qZWN0IHNldHVwCnByb2plY3RfbmFtZSA8LSBjb25maWckcHJvamVjdF9uYW1lCm50aHJlYWRzIDwtIGNvbmZpZyRudGhyZWFkcyAjIGUuZy4gNCBmb3IgZnVycnIgbXVsdGlzZXNzaW9uCgojIHRoaXMgbWFrZSB0YWtlIGEgd2hpbGUuLi4KIyBodHRwczovL3JzdHVkaW8uZ2l0aHViLmlvL3JlbnYvcmVmZXJlbmNlL2NvbmZpZy5odG1sCiMgZ2V0T3B0aW9uKHgsIGRlZmF1bHQgPSBOVUxMKQojIHJlbnY6OnNldHRpbmdzJHVzZS5jYWNoZSgpCiMgZ2V0T3B0aW9uKCdyZW52LmNvbmZpZy5wYWsuZW5hYmxlZCcpCiMgaWYgdGhlIHByb2plY3QgZG9lcyBub3QgYXV0b21hdGljYWxseSBhY3RpdmF0ZSBydW46CmlmKFN5cy5nZXRlbnYoIlJFTlZfUEFUSFNfQ0FDSEUiKSAhPSAiL3JlbnZfY2FjaGUiKSB7U3lzLnNldGVudihSRU5WX1BBVEhTX0NBQ0hFID0gIi9yZW52X2NhY2hlIil9CmlmKFN5cy5nZXRlbnYoIlJFTlZfUEFUSFNfTElCUkFSWSIpICE9ICIvaG9tZS9yc3R1ZGlvL3JlbnZfbGlicmFyeSIpIHtTeXMuc2V0ZW52KFJFTlZfUEFUSFNfTElCUkFSWSA9ICIvaG9tZS9yc3R1ZGlvL3JlbnZfbGlicmFyeSIpfQojaWYoU3lzLmdldGVudigiUkVOVl9DT05GSUdfUEFLX0VOQUJMRUQiKSAhPSAiVFJVRSIpIHtTeXMuc2V0ZW52KFJFTlZfQ09ORklHX1BBS19FTkFCTEVEID0gVFJVRSl9ICMgYWRkIHBhaywgdGFyZ2V0cyBhbmQgYmVuY2htYXJrbWUgdG8gZG9ja2VyISEhCiMgc2V0dGluZyByb290IGRpcgojIDAuIHJlbnY6OmFjdGl2YXRlCnJlbnY6OmFjdGl2YXRlKHByb2plY3QgPSBiYXNlX2RpcikKCiNJRiBwYWNrYWdlcyBhcmUgbm90IGxvYWRlZCBwcm9wZXJseSwgdXNlOiAKIyByZW52OjpyZXN0b3JlKCkKCiMgT25seSBmb3IgbWFudWFsIGluc3RhbGxhdGlvbiBvZiB0aGUgcHJvamVjdCBhbmQgcmVjb3ZlcnkgZnJvbSB0aGUgbG9jayBmaWxlLgojIHJlbnY6OmluaXQocHJvamVjdCA9IGJhc2VfZGlyLCBiYXJlPVRSVUUsIGJpb2NvbmR1Y3RvciA9ICIzLjE2IikKIyBGT1IgdGhlIGZpcnN0IHJ1biByZXN0b3JlIGVudmlyb25tZW50IGFuZCBwYWNrYWdlcyBmcm9tIHJlbnYubG9jayAKIyAxLiByZXN0b3JlIG9yaWdpbmFsIGVudmlyb25tZW50CiMgcmVudjo6cmVzdG9yZShwcm9qZWN0ID0gYmFzZV9kaXIsIGxvY2tmaWxlID0gZmlsZS5wYXRoKGJhc2VfZGlyLCBwYXN0ZTAocHJvamVjdF9uYW1lLCAiX3JlbnYubG9jayIpKSwgcHJvbXB0ID0gRkFMU0UpCiMgZmlsZS5jb3B5KGZyb20gPSBmaWxlLnBhdGgoYmFzZV9kaXIsIHBhc3RlMChwcm9qZWN0X25hbWUsICJfcmVudi5sb2NrIikpLCB0byA9IGZpbGUucGF0aChiYXNlX2RpciwgInJlbnYubG9jayIpKSAjIHRvIHVzZSB3aXRoIHJlbnY6OmRpYWdub3N0aWNzKCkKCiMgZm9yIHNvbWUgcmVhc29uIGFwZWdsbSBwYWNrYWdlIGNhbm5vdCBiZSBzcGFuc2hvdHRlZCB0byB0aGUgcmVudi5sb2NrIGZpbGUuIHNvLCBpdCBtdXN0IGJlIGluc3RhbGxlZCBtYW51YWxseS4KIyBCaW9jTWFuYWdlcjo6aW5zdGFsbCgiYXBlZ2xtIikKCmBgYAoKYGBge3IgaW5pdGlhbF9zZXR1cCwgaW5jbHVkZT1GQUxTRX0KIyBtb3ZlIHRvIGFuIGludGVybmFsIGhpZGRlbiBjaHVuaz8KCnNldC5zZWVkKDQyKQoKaW1wb3J0Ojpmcm9tKGtuaXRyLCBvcHRzX2NodW5rKQoKIyByZXBvcnQgb3B0aW9ucyAtLS0tCiMga25pdHI6Om9wdHNfY2h1bmsKb3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsCiAgICAgICAgICAgICAgICAgICAgICBldmFsID0gVFJVRSwKICAgICAgICAgICAgICAgICAgICAgIGNhY2hlLmxhenkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIG1lc3NhZ2U9RkFMU0UsCiAgICAgICAgICAgICAgICAgICAgICB3YXJuaW5nPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgZGV2ID0gInBuZyIpCgojIGNhY2hlLnBhdGggPSBmaWxlLnBhdGgocmVzdWx0c19kaXIsInJlcG9ydCIsImNhY2hlLyIpCiMgZmlnLnBhdGggPSBmaWxlLnBhdGgocmVzdWx0c19kaXIsInJlcG9ydCIsImZpbGVzLyIpICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgIApvcHRpb25zKHdpZHRoPTEwMCkKCmBgYAoKCmBgYHtyIHJ1biBpbiB0aGUgdGVybWluYWwsIGV2YWwgPSBGQUxTRSwgaW5jbHVkZSA9IEZBTFNFfQojIHJ1biBpbiB0ZXJtaW5hbCB0byBnZW5lcmF0ZSByZXBvcnQKcm1hcmtkb3duOjpyZW5kZXIob3V0cHV0X2ZpbGUgPSBmaWxlLnBhdGgoYmFzZV9kaXIsICJua190dW1faW1tdW5vZWRpdGluZy5odG1sIiksIAogICAgICAgICAgICAgICAgICBpbnB1dCA9IGhlcmU6OmhlcmUoIm5rX3R1bV9pbW11bm9lZGl0aW5nLlJtZCIpKQpgYGAKCmBgYHtyIGxvYWRpbmcgbGlicmFyaWVzLCBtZXNzYWdlPUZBTFNFLCBldmFsPVRSVUUsIGluY2x1ZGU9VFJVRX0KIyBpbXBvcnRpbmcgb25seSBrZXkgZnVuY3Rpb25zIHRoYXQgYXJlIGFjdHVhbGx5IHVzZWQgLSBub3QgdG8gcG9sdXRlIG5hbWVzcGFjZSEKaW1wb3J0Ojpmcm9tKHJlYWRyLCByZWFkX2NzdikKaW1wb3J0Ojpmcm9tKG1hZ3JpdHRyLCAiJT4lIikKaW1wb3J0Ojpmcm9tKGRwbHlyLCBtdXRhdGUsIHNlbGVjdCwgZmlsdGVyLCByZW5hbWUsIGFycmFuZ2UsIGRlc2MsIGdyb3VwX2J5LCBzdW1tYXJpc2UsIHVuZ3JvdXApICAjIGRwbHlyX211dGF0ZSA9IG11dGF0ZQppbXBvcnQ6OmZyb20ocHVycnIsIG1hcCkKaW1wb3J0Ojpmcm9tKGZ1dHVyZSwgcGxhbiwgbXVsdGlzZXNzaW9uLCBzZXF1ZW50aWFsKQppbXBvcnQ6OmZyb20oZnVycnIsIGZ1cnJyX29wdGlvbnMsIGZ1dHVyZV9tYXAyKQppbXBvcnQ6OmZyb20oZ2dwbG90MiwgLmFsbD1UUlVFKSAjIGltcG9ydGluZyBhbGwgYXMgdGhlcmUgaXMgdG9vIG1hbnkKaW1wb3J0Ojpmcm9tKGdyaWQsIGdwYXIpICMgbmVlZGVkIGluIGNvbXBsZXhoZWF0bWFwCmltcG9ydDo6ZnJvbShrYWJsZUV4dHJhLCBrYWJsZV9zdHlsaW5nLCBrYmwpCmltcG9ydDo6ZnJvbSguZnJvbSA9IFN1bW1hcml6ZWRFeHBlcmltZW50LCBjb2xEYXRhLCBhc3NheSkgIyB1c2VkIGluIGV2ZXJ5IC5SbWQKaW1wb3J0Ojpmcm9tKC5mcm9tID0gdGFibGVvbmUsIENyZWF0ZVRhYmxlT25lKQoKYGBgCgojIyBQcmVwYXJpbmcgZGF0YXNldHMgCgpgYGB7ciwgY2hpbGQ9aGVyZTo6aGVyZSgiMDBfcHJlcGFyZV9kYXRhc2V0cy5SbWQiKSwgaW5jbHVkZT1UUlVFLCBldmFsID0gVFJVRX0KYGBgCgojIyBFeHBsb3JhdG9yeSBEYXRhIEFuYWx5c2lzIC0gbWV0YWRhdGEKCmBgYHtyLCBjaGlsZD1oZXJlOjpoZXJlKCIwMV9lZGFfbWV0YWRhdGEuUm1kIiksIGluY2x1ZGU9VFJVRSwgZXZhbCA9IFRSVUV9CmBgYAoKIyMgRXhwbG9yYXRvcnkgRGF0YSBBbmFseXNpcyAtIGV4cHJlc3Npb24KCmBgYHtyLCBjaGlsZD1oZXJlOjpoZXJlKCIwMl9lZGFfZXhwcmVzc2lvbi5SbWQiKSwgaW5jbHVkZT1UUlVFLCBldmFsID0gVFJVRX0KYGBgCgojIyBSZXN1bHRzIC0gVHBOS190cDJfdnNfVG9ubHlfdHAxIAoKYGBge3IsIGNoaWxkPWhlcmU6OmhlcmUoIjAzX3Jlc3VsdHNfVHBOS190cDJfdnNfVG9ubHlfdHAxX0FsZWtzX0FCX0NEX3NlcGFyYXRlZC5SbWQiKSwgaW5jbHVkZT1UUlVFLCBldmFsID0gVFJVRX0KYGBgCgojIyBSZXN1bHRzIC0gRVhQOS44IGNvbXBhcmluZyBXVCB2cyBLTywgSUZORyBhbmQgUHJmS09fTksgdGltZXBvaW50cwoKYGBge3IsIGNoaWxkPWhlcmU6OmhlcmUoIjA0X2V4cDkuOF9LT19XVF9zdGltX2NvbXBhcmlzb24uUm1kIiksIGluY2x1ZGU9VFJVRSwgZXZhbCA9IFRSVUV9CmBgYAoKIyBzZXNzaW9uSW5mbwpgYGB7ciBpbmNsdWRlPVRSVUUsIGV2YWw9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CnNlc3Npb25JbmZvKCkKCiMgc2F2aW5nIHNlc3Npb24gaW5mbwpzaW5rKGZpbGUucGF0aChyZXN1bHRzX2RpciwgcGFzdGUwKCJzZXNzaW9uSW5mb18iLCBwcm9qZWN0X25hbWUsIi50eHQiKSkpCnNlc3Npb25JbmZvKCkKc2luaygpCgpyZW52OjpzbmFwc2hvdChsb2NrZmlsZSA9IGZpbGUucGF0aChiYXNlX2RpciwgcGFzdGUwKHByb2plY3RfbmFtZSwgIl9yZW52LmxvY2siKSkpCnJlbnY6OnN0YXR1cyhsb2NrZmlsZSA9IGZpbGUucGF0aChiYXNlX2RpciwgcGFzdGUwKHByb2plY3RfbmFtZSwgIl9yZW52LmxvY2siKSkpCgpgYGAK